From 4c06c8554c35887f8f5147783953b2b7e7c2327f Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Wed, 8 Jun 2022 15:52:16 +0200
Subject: .

---
 src/compiler.rs                      |  367 +++++
 src/constant.rs                      | 2561 ++++++++++++++++++++++++++++++++++
 src/construct/blank_line.rs          |   61 +
 src/construct/character_escape.rs    |   69 +
 src/construct/character_reference.rs |  237 ++++
 src/construct/code_fenced.rs         |  581 ++++++++
 src/construct/code_indented.rs       |  190 +++
 src/construct/heading_atx.rs         |  175 +++
 src/construct/html_flow.rs           | 1068 ++++++++++++++
 src/construct/mod.rs                 |   11 +
 src/construct/partial_whitespace.rs  |   66 +
 src/construct/thematic_break.rs      |  137 ++
 src/content/flow.rs                  |  258 ++++
 src/content/mod.rs                   |    4 +
 src/content/string.rs                |  120 ++
 src/lib.rs                           |   52 +
 src/parser.rs                        |   14 +
 src/tokenizer.rs                     |  580 ++++++++
 src/util.rs                          |  241 ++++
 19 files changed, 6792 insertions(+)
 create mode 100644 src/compiler.rs
 create mode 100644 src/constant.rs
 create mode 100644 src/construct/blank_line.rs
 create mode 100644 src/construct/character_escape.rs
 create mode 100644 src/construct/character_reference.rs
 create mode 100644 src/construct/code_fenced.rs
 create mode 100644 src/construct/code_indented.rs
 create mode 100644 src/construct/heading_atx.rs
 create mode 100644 src/construct/html_flow.rs
 create mode 100644 src/construct/mod.rs
 create mode 100644 src/construct/partial_whitespace.rs
 create mode 100644 src/construct/thematic_break.rs
 create mode 100644 src/content/flow.rs
 create mode 100644 src/content/mod.rs
 create mode 100644 src/content/string.rs
 create mode 100644 src/lib.rs
 create mode 100644 src/parser.rs
 create mode 100644 src/tokenizer.rs
 create mode 100644 src/util.rs

(limited to 'src')

diff --git a/src/compiler.rs b/src/compiler.rs
new file mode 100644
index 0000000..166950e
--- /dev/null
+++ b/src/compiler.rs
@@ -0,0 +1,367 @@
+//! Turn events into a string of HTML.
+use crate::construct::character_reference::Kind as CharacterReferenceKind;
+use crate::tokenizer::{Code, Event, EventType, TokenType};
+use crate::util::{
+    decode_named_character_reference, decode_numeric_character_reference, encode, get_span,
+    slice_serialize,
+};
+
+/// Configuration (optional).
+#[derive(Default, Debug)]
+pub struct CompileOptions {
+    /// Whether to allow (dangerous) HTML.
+    /// The default is `false`, you can turn it on to `true` for trusted
+    /// content.
+    pub allow_dangerous_html: bool,
+}
+
+/// Turn events and codes into a string of HTML.
+#[allow(clippy::too_many_lines)]
+pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> String {
+    let mut index = 0;
+    // let mut last_was_tag = false;
+    let buffers: &mut Vec<Vec<String>> = &mut vec![vec![]];
+    let mut atx_opening_sequence_size: Option<usize> = None;
+    let mut atx_heading_buffer: Option<String> = None;
+    let mut code_flow_seen_data: Option<bool> = None;
+    let mut code_fenced_fences_count: Option<usize> = None;
+    let mut slurp_one_line_ending = false;
+    let mut ignore_encode = false;
+    let mut character_reference_kind: Option<CharacterReferenceKind> = None;
+    // let mut slurp_all_line_endings = false;
+
+    println!("events: {:#?}", events);
+
+    while index < events.len() {
+        let event = &events[index];
+        let token_type = &event.token_type;
+
+        match event.event_type {
+            EventType::Enter => match token_type {
+                TokenType::Content => {
+                    buf_tail_mut(buffers).push("<p>".to_string());
+                }
+                TokenType::CodeIndented => {
+                    code_flow_seen_data = Some(false);
+                    line_ending_if_needed(buffers);
+                    buf_tail_mut(buffers).push("<pre><code>".to_string());
+                }
+                TokenType::CodeFenced => {
+                    code_flow_seen_data = Some(false);
+                    line_ending_if_needed(buffers);
+                    // Note: no `>`, which is added later.
+                    buf_tail_mut(buffers).push("<pre><code".to_string());
+                    code_fenced_fences_count = Some(0);
+                }
+                TokenType::CodeFencedFenceInfo | TokenType::CodeFencedFenceMeta => {
+                    buffer(buffers);
+                }
+                TokenType::HtmlFlow => {
+                    line_ending_if_needed(buffers);
+                    if options.allow_dangerous_html {
+                        ignore_encode = true;
+                    }
+                }
+                TokenType::ContentPhrasing
+                | TokenType::AtxHeading
+                | TokenType::AtxHeadingSequence
+                | TokenType::AtxHeadingWhitespace
+                | TokenType::AtxHeadingText
+                | TokenType::LineEnding
+                | TokenType::ThematicBreak
+                | TokenType::ThematicBreakSequence
+                | TokenType::ThematicBreakWhitespace
+                | TokenType::CodeIndentedPrefixWhitespace
+                | TokenType::CodeFlowChunk
+                | TokenType::BlankLineEnding
+                | TokenType::BlankLineWhitespace
+                | TokenType::Whitespace
+                | TokenType::HtmlFlowData
+                | TokenType::CodeFencedFence
+                | TokenType::CodeFencedFenceSequence
+                | TokenType::ChunkString
+                | TokenType::CodeFencedFenceWhitespace
+                | TokenType::Data
+                | TokenType::CharacterEscape
+                | TokenType::CharacterEscapeMarker
+                | TokenType::CharacterEscapeValue
+                | TokenType::CharacterReference
+                | TokenType::CharacterReferenceMarker
+                | TokenType::CharacterReferenceMarkerNumeric
+                | TokenType::CharacterReferenceMarkerHexadecimal
+                | TokenType::CharacterReferenceMarkerSemi
+                | TokenType::CharacterReferenceValue => {}
+                #[allow(unreachable_patterns)]
+                _ => {
+                    unreachable!("unhandled `enter` of TokenType {:?}", token_type)
+                }
+            },
+            EventType::Exit => match token_type {
+                TokenType::ThematicBreakSequence
+                | TokenType::ThematicBreakWhitespace
+                | TokenType::CodeIndentedPrefixWhitespace
+                | TokenType::BlankLineEnding
+                | TokenType::BlankLineWhitespace
+                | TokenType::Whitespace
+                | TokenType::CodeFencedFenceSequence
+                | TokenType::CodeFencedFenceWhitespace
+                | TokenType::CharacterEscape
+                | TokenType::CharacterEscapeMarker
+                | TokenType::CharacterReference
+                | TokenType::CharacterReferenceMarkerSemi => {}
+                TokenType::HtmlFlow => {
+                    ignore_encode = false;
+                }
+                TokenType::HtmlFlowData => {
+                    let slice = slice_serialize(codes, &get_span(events, index), false);
+
+                    let res = if ignore_encode { slice } else { encode(&slice) };
+
+                    // last_was_tag = false;
+                    buf_tail_mut(buffers).push(res);
+                }
+                TokenType::Content => {
+                    buf_tail_mut(buffers).push("</p>".to_string());
+                }
+                TokenType::CodeIndented | TokenType::CodeFenced => {
+                    let seen_data =
+                        code_flow_seen_data.expect("`code_flow_seen_data` must be defined");
+
+                    // To do: containers.
+                    // One special case is if we are inside a container, and the fenced code was
+                    // not closed (meaning it runs to the end).
+                    // In that case, the following line ending, is considered *outside* the
+                    // fenced code and block quote by micromark, but CM wants to treat that
+                    // ending as part of the code.
+                    // if fenced_count != None && fenced_count < 2 && tightStack.length > 0 && !last_was_tag {
+                    //     line_ending();
+                    // }
+
+                    // But in most cases, it’s simpler: when we’ve seen some data, emit an extra
+                    // line ending when needed.
+                    if seen_data {
+                        line_ending_if_needed(buffers);
+                    }
+
+                    buf_tail_mut(buffers).push("</code></pre>".to_string());
+
+                    if let Some(count) = code_fenced_fences_count {
+                        if count < 2 {
+                            line_ending_if_needed(buffers);
+                        }
+                    }
+
+                    code_flow_seen_data = None;
+                    code_fenced_fences_count = None;
+                    slurp_one_line_ending = false;
+                }
+                TokenType::CodeFencedFence => {
+                    let count = if let Some(count) = code_fenced_fences_count {
+                        count
+                    } else {
+                        0
+                    };
+
+                    if count == 0 {
+                        buf_tail_mut(buffers).push(">".to_string());
+                        // tag = true;
+                        slurp_one_line_ending = true;
+                    }
+
+                    code_fenced_fences_count = Some(count + 1);
+                }
+                TokenType::CodeFencedFenceInfo => {
+                    let value = resume(buffers);
+                    buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value));
+                    // tag = true;
+                }
+                TokenType::CodeFencedFenceMeta => {
+                    resume(buffers);
+                }
+                TokenType::CodeFlowChunk => {
+                    code_flow_seen_data = Some(true);
+                    buf_tail_mut(buffers).push(encode(&slice_serialize(
+                        codes,
+                        &get_span(events, index),
+                        false,
+                    )));
+                }
+                // `AtxHeadingWhitespace` is ignored after the opening sequence,
+                // before the closing sequence, and after the closing sequence.
+                // But it is used around intermediate sequences.
+                // `atx_heading_buffer` is set to `Some` by the first `AtxHeadingText`.
+                // `AtxHeadingSequence` is ignored as the opening and closing sequence,
+                // but not when intermediate.
+                TokenType::AtxHeadingWhitespace | TokenType::AtxHeadingSequence => {
+                    if let Some(buf) = atx_heading_buffer {
+                        atx_heading_buffer = Some(
+                            buf.to_string()
+                                + &encode(&slice_serialize(codes, &get_span(events, index), false)),
+                        );
+                    }
+
+                    // First fence we see.
+                    if None == atx_opening_sequence_size {
+                        let rank = slice_serialize(codes, &get_span(events, index), false).len();
+                        atx_opening_sequence_size = Some(rank);
+                        buf_tail_mut(buffers).push(format!("<h{}>", rank));
+                    }
+                }
+                TokenType::AtxHeadingText => {
+                    println!("text: {:?}", atx_heading_buffer);
+                    if let Some(ref buf) = atx_heading_buffer {
+                        if !buf.is_empty() {
+                            buf_tail_mut(buffers).push(encode(buf));
+                            atx_heading_buffer = Some("".to_string());
+                        }
+                    } else {
+                        atx_heading_buffer = Some("".to_string());
+                    }
+
+                    let slice = encode(&slice_serialize(codes, &get_span(events, index), false));
+                    println!("slice: {:?}", slice);
+                    buf_tail_mut(buffers).push(slice);
+                }
+                TokenType::AtxHeading => {
+                    let rank = atx_opening_sequence_size
+                        .expect("`atx_opening_sequence_size` must be set in headings");
+                    buf_tail_mut(buffers).push(format!("</h{}>", rank));
+                    atx_opening_sequence_size = None;
+                    atx_heading_buffer = None;
+                }
+                TokenType::ThematicBreak => {
+                    buf_tail_mut(buffers).push("<hr />".to_string());
+                }
+                TokenType::LineEnding => {
+                    // if slurp_all_line_endings {
+                    //     // Empty.
+                    // } else
+                    if slurp_one_line_ending {
+                        slurp_one_line_ending = false;
+                    // } else if code_text_inside {
+                    //     buf_tail_mut(buffers).push(" ".to_string());
+                    } else {
+                        buf_tail_mut(buffers).push(encode(&slice_serialize(
+                            codes,
+                            &get_span(events, index),
+                            false,
+                        )));
+                    }
+                }
+                TokenType::CharacterReferenceMarker => {
+                    character_reference_kind = Some(CharacterReferenceKind::Named);
+                }
+                TokenType::CharacterReferenceMarkerNumeric => {
+                    character_reference_kind = Some(CharacterReferenceKind::Decimal);
+                }
+                TokenType::CharacterReferenceMarkerHexadecimal => {
+                    character_reference_kind = Some(CharacterReferenceKind::Hexadecimal);
+                }
+                TokenType::CharacterReferenceValue => {
+                    let kind = character_reference_kind
+                        .expect("expected `character_reference_kind` to be set");
+                    let reference = slice_serialize(codes, &get_span(events, index), false);
+                    let ref_string = reference.as_str();
+                    let value = match kind {
+                        CharacterReferenceKind::Decimal => {
+                            decode_numeric_character_reference(ref_string, 10).to_string()
+                        }
+                        CharacterReferenceKind::Hexadecimal => {
+                            decode_numeric_character_reference(ref_string, 16).to_string()
+                        }
+                        CharacterReferenceKind::Named => {
+                            decode_named_character_reference(ref_string)
+                        }
+                    };
+
+                    buf_tail_mut(buffers).push(value);
+
+                    character_reference_kind = None;
+                }
+                // To do: `ContentPhrasing` should be parsed as phrasing first.
+                // This branch below currently acts as the resulting `data` tokens.
+                TokenType::ContentPhrasing
+                // To do: `ChunkString` does not belong here. Remove it when subtokenization is supported.
+                | TokenType::ChunkString
+                | TokenType::Data
+                | TokenType::CharacterEscapeValue => {
+                    // last_was_tag = false;
+                    buf_tail_mut(buffers).push(encode(&slice_serialize(
+                        codes,
+                        &get_span(events, index),
+                        false,
+                    )));
+                }
+                #[allow(unreachable_patterns)]
+                _ => {
+                    unreachable!("unhandled `exit` of TokenType {:?}", token_type)
+                }
+            },
+        }
+
+        index += 1;
+    }
+
+    assert!(buffers.len() == 1, "expected 1 final buffer");
+    buffers.get(0).expect("expected 1 final buffer").concat()
+}
+
+/// Push a buffer.
+fn buffer(buffers: &mut Vec<Vec<String>>) {
+    buffers.push(vec![]);
+}
+
+/// Pop a buffer, returning its value.
+fn resume(buffers: &mut Vec<Vec<String>>) -> String {
+    let buf = buffers.pop().expect("Cannot resume w/o buffer");
+    buf.concat()
+}
+
+/// Get the last chunk of current buffer.
+fn buf_tail_slice(buffers: &mut [Vec<String>]) -> Option<&String> {
+    let tail = buf_tail(buffers);
+    tail.last()
+}
+
+/// Get the mutable last chunk of current buffer.
+fn buf_tail_mut(buffers: &mut [Vec<String>]) -> &mut Vec<String> {
+    buffers
+        .last_mut()
+        .expect("at least one buffer should exist")
+}
+
+/// Get the current buffer.
+fn buf_tail(buffers: &mut [Vec<String>]) -> &Vec<String> {
+    buffers.last().expect("at least one buffer should exist")
+}
+
+/// Add a line ending.
+fn line_ending(buffers: &mut [Vec<String>]) {
+    let tail = buf_tail_mut(buffers);
+    // To do: use inferred line ending style.
+    // lastWasTag = false
+    tail.push("\n".to_string());
+}
+
+/// Add a line ending if needed (as in, there’s no eol/eof already).
+fn line_ending_if_needed(buffers: &mut [Vec<String>]) {
+    let slice = buf_tail_slice(buffers);
+    let last_char = if let Some(x) = slice {
+        x.chars().last()
+    } else {
+        None
+    };
+    let mut add = true;
+
+    if let Some(x) = last_char {
+        if x == '\n' || x == '\r' {
+            add = false;
+        }
+    } else {
+        add = false;
+    }
+
+    if add {
+        line_ending(buffers);
+    }
+}
diff --git a/src/constant.rs b/src/constant.rs
new file mode 100644
index 0000000..332fdaf
--- /dev/null
+++ b/src/constant.rs
@@ -0,0 +1,2561 @@
+//! Constants needed to parse markdown.
+//!
+//! Most of these constants are magic numbers, such as the number of markers
+//! needed to parse [code (fenced)][code_fenced]
+//! ([`CODE_FENCED_SEQUENCE_SIZE_MIN`][]) or the max number of allowed markers
+//! in a [heading (atx)][heading_atx]
+//! ([`HEADING_ATX_OPENING_FENCE_SIZE_MAX`][]).
+//!
+//! Some constants are instead lists of things, such as the list of tag names
+//! considered in the **raw** production of [HTML (flow)][html_flow]
+//! ([`HTML_RAW_NAMES`][]), or the list of allowed named character references
+//! ([`CHARACTER_REFERENCE_NAMES`][]).
+//!
+//! [code_fenced]: crate::construct::code_fenced
+//! [heading_atx]: crate::construct::heading_atx
+//! [html_flow]: crate::construct::html_flow
+
+/// The number of characters that form a tab stop.
+///
+/// This relates to the number of whitespace characters needed to form certain
+/// constructs in markdown, most notable the whitespace required to form
+/// [code (indented)][code_indented].
+///
+/// <!-- To do: link to somewhere that discusses virtual spaces. -->
+/// <!-- Ref: https://github.com/syntax-tree/mdast-util-to-markdown/issues/51 -->
+///
+/// [code_indented]: crate::construct::code_indented
+pub const TAB_SIZE: usize = 4;
+
+/// The number of markers needed for a [thematic break][thematic_break] to form.
+///
+/// Like many things in markdown, the number is `3`.
+///
+/// [thematic_break]: crate::construct::thematic_break
+pub const THEMATIC_BREAK_MARKER_COUNT_MIN: usize = 3;
+
+/// The max number of markers allowed to form a [heading (atx)][heading_atx].
+///
+/// This limitation is imposed by HTML, which imposes a max heading rank of
+/// `6`.
+///
+/// [heading_atx]: crate::construct::heading_atx
+pub const HEADING_ATX_OPENING_FENCE_SIZE_MAX: usize = 6;
+
+/// The number of markers needed for [code (fenced)][code_fenced] to form.
+///
+/// Like many things in markdown, the number is `3`.
+///
+/// [code_fenced]: crate::construct::code_fenced
+pub const CODE_FENCED_SEQUENCE_SIZE_MIN: usize = 3;
+
+/// List of HTML tag names that form the **raw** production of
+/// [HTML (flow)][html_flow].
+///
+/// The **raw** production allows blank lines and thus no interleaving with
+/// markdown.
+/// Tag name matching must be performed insensitive to case, and thus this list
+/// includes lowercase tag names.
+///
+/// The number of the longest tag name is also stored as a constant in
+/// [`HTML_RAW_SIZE_MAX`][].
+///
+/// > 👉 **Note**: `textarea` was added in `CommonMark@0.30`.
+///
+/// ## References
+///
+/// *   [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks)
+///
+/// [html_flow]: crate::construct::html_flow
+pub const HTML_RAW_NAMES: [&str; 4] = ["pre", "script", "style", "textarea"];
+
+/// The number of the longest tag name in [`HTML_RAW_NAMES`][].
+///
+/// This is currently the size of `textarea`.
+pub const HTML_RAW_SIZE_MAX: usize = 8;
+
+/// List of HTML tag names that form the **basic** production of
+/// [HTML (flow)][html_flow].
+///
+/// The **basic** production allows interleaving HTML and markdown with blank lines
+/// and allows flow (block) elements to interrupt content.
+/// Tag name matching must be performed insensitive to case, and thus this list
+/// includes lowercase tag names.
+///
+/// Tag names not on this list result in the **complete** production.
+///
+/// > 👉 **Note**: `source` was removed on `main` of the `CommonMark` spec and
+/// > is slated to be released in `CommonMark@0.31`.
+///
+/// ## References
+///
+/// *   [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks)
+/// *   [*Remove source element as HTML block start condition* as `commonmark/commonmark-spec#710`](https://github.com/commonmark/commonmark-spec/pull/710)
+///
+/// [html_flow]: crate::construct::html_flow
+pub const HTML_BLOCK_NAMES: [&str; 61] = [
+    "address",
+    "article",
+    "aside",
+    "base",
+    "basefont",
+    "blockquote",
+    "body",
+    "caption",
+    "center",
+    "col",
+    "colgroup",
+    "dd",
+    "details",
+    "dialog",
+    "dir",
+    "div",
+    "dl",
+    "dt",
+    "fieldset",
+    "figcaption",
+    "figure",
+    "footer",
+    "form",
+    "frame",
+    "frameset",
+    "h1",
+    "h2",
+    "h3",
+    "h4",
+    "h5",
+    "h6",
+    "head",
+    "header",
+    "hr",
+    "html",
+    "iframe",
+    "legend",
+    "li",
+    "link",
+    "main",
+    "menu",
+    "menuitem",
+    "nav",
+    "noframes",
+    "ol",
+    "optgroup",
+    "option",
+    "p",
+    "param",
+    "section",
+    "summary",
+    "table",
+    "tbody",
+    "td",
+    "tfoot",
+    "th",
+    "thead",
+    "title",
+    "tr",
+    "track",
+    "ul",
+];
+
+/// The max number of characters in a hexadecimal numeric
+/// [character reference][character_reference].
+///
+/// To illustrate, this allows `&#xff9999;` and disallows `&#xff99990;`.
+/// This limit is imposed because all bigger numbers are invalid.
+///
+/// [character_reference]: crate::construct::character_reference
+pub const CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX: usize = 6;
+
+/// The max number of characters in a decimal numeric
+/// [character reference][character_reference].
+///
+/// To illustrate, this allows `&#9999999;` and disallows `&#99999990;`.
+/// This limit is imposed because all bigger numbers are invalid.
+///
+/// [character_reference]: crate::construct::character_reference
+pub const CHARACTER_REFERENCE_DECIMAL_SIZE_MAX: usize = 7;
+
+/// The max number of characters in a named
+/// [character reference][character_reference].
+///
+/// This is the number of the longest name in [`CHARACTER_REFERENCE_NAMES`][].
+/// It allows `&CounterClockwiseContourIntegral;` and prevents the parser from
+/// continuing for eons.
+///
+/// [character_reference]: crate::construct::character_reference
+pub const CHARACTER_REFERENCE_NAMED_SIZE_MAX: usize = 31;
+
+/// List of names that can form a named
+/// [character reference][character_reference].
+///
+/// This list is sensitive to casing.
+///
+/// The number of the longest name (`CounterClockwiseContourIntegral`) is also
+/// stored as a constant in [`CHARACTER_REFERENCE_NAMED_SIZE_MAX`][].
+///
+/// The corresponding values of this list are stored in
+/// [`CHARACTER_REFERENCE_VALUES`][].
+/// They correspond through their index.
+///
+/// ## References
+///
+/// *   [*§ 2.5 Entity and numeric character references* in `CommonMark`](https://spec.commonmark.org/0.30/#entity-and-numeric-character-references)
+///
+/// [character_reference]: crate::construct::character_reference
+pub const CHARACTER_REFERENCE_NAMES: [&str; 2222] = [
+    "AEli",
+    "AElig",
+    "AM",
+    "AMP",
+    "Aacut",
+    "Aacute",
+    "Abreve",
+    "Acir",
+    "Acirc",
+    "Acy",
+    "Afr",
+    "Agrav",
+    "Agrave",
+    "Alpha",
+    "Amacr",
+    "And",
+    "Aogon",
+    "Aopf",
+    "ApplyFunction",
+    "Arin",
+    "Aring",
+    "Ascr",
+    "Assign",
+    "Atild",
+    "Atilde",
+    "Aum",
+    "Auml",
+    "Backslash",
+    "Barv",
+    "Barwed",
+    "Bcy",
+    "Because",
+    "Bernoullis",
+    "Beta",
+    "Bfr",
+    "Bopf",
+    "Breve",
+    "Bscr",
+    "Bumpeq",
+    "CHcy",
+    "COP",
+    "COPY",
+    "Cacute",
+    "Cap",
+    "CapitalDifferentialD",
+    "Cayleys",
+    "Ccaron",
+    "Ccedi",
+    "Ccedil",
+    "Ccirc",
+    "Cconint",
+    "Cdot",
+    "Cedilla",
+    "CenterDot",
+    "Cfr",
+    "Chi",
+    "CircleDot",
+    "CircleMinus",
+    "CirclePlus",
+    "CircleTimes",
+    "ClockwiseContourIntegral",
+    "CloseCurlyDoubleQuote",
+    "CloseCurlyQuote",
+    "Colon",
+    "Colone",
+    "Congruent",
+    "Conint",
+    "ContourIntegral",
+    "Copf",
+    "Coproduct",
+    "CounterClockwiseContourIntegral",
+    "Cross",
+    "Cscr",
+    "Cup",
+    "CupCap",
+    "DD",
+    "DDotrahd",
+    "DJcy",
+    "DScy",
+    "DZcy",
+    "Dagger",
+    "Darr",
+    "Dashv",
+    "Dcaron",
+    "Dcy",
+    "Del",
+    "Delta",
+    "Dfr",
+    "DiacriticalAcute",
+    "DiacriticalDot",
+    "DiacriticalDoubleAcute",
+    "DiacriticalGrave",
+    "DiacriticalTilde",
+    "Diamond",
+    "DifferentialD",
+    "Dopf",
+    "Dot",
+    "DotDot",
+    "DotEqual",
+    "DoubleContourIntegral",
+    "DoubleDot",
+    "DoubleDownArrow",
+    "DoubleLeftArrow",
+    "DoubleLeftRightArrow",
+    "DoubleLeftTee",
+    "DoubleLongLeftArrow",
+    "DoubleLongLeftRightArrow",
+    "DoubleLongRightArrow",
+    "DoubleRightArrow",
+    "DoubleRightTee",
+    "DoubleUpArrow",
+    "DoubleUpDownArrow",
+    "DoubleVerticalBar",
+    "DownArrow",
+    "DownArrowBar",
+    "DownArrowUpArrow",
+    "DownBreve",
+    "DownLeftRightVector",
+    "DownLeftTeeVector",
+    "DownLeftVector",
+    "DownLeftVectorBar",
+    "DownRightTeeVector",
+    "DownRightVector",
+    "DownRightVectorBar",
+    "DownTee",
+    "DownTeeArrow",
+    "Downarrow",
+    "Dscr",
+    "Dstrok",
+    "ENG",
+    "ET",
+    "ETH",
+    "Eacut",
+    "Eacute",
+    "Ecaron",
+    "Ecir",
+    "Ecirc",
+    "Ecy",
+    "Edot",
+    "Efr",
+    "Egrav",
+    "Egrave",
+    "Element",
+    "Emacr",
+    "EmptySmallSquare",
+    "EmptyVerySmallSquare",
+    "Eogon",
+    "Eopf",
+    "Epsilon",
+    "Equal",
+    "EqualTilde",
+    "Equilibrium",
+    "Escr",
+    "Esim",
+    "Eta",
+    "Eum",
+    "Euml",
+    "Exists",
+    "ExponentialE",
+    "Fcy",
+    "Ffr",
+    "FilledSmallSquare",
+    "FilledVerySmallSquare",
+    "Fopf",
+    "ForAll",
+    "Fouriertrf",
+    "Fscr",
+    "GJcy",
+    "G",
+    "GT",
+    "Gamma",
+    "Gammad",
+    "Gbreve",
+    "Gcedil",
+    "Gcirc",
+    "Gcy",
+    "Gdot",
+    "Gfr",
+    "Gg",
+    "Gopf",
+    "GreaterEqual",
+    "GreaterEqualLess",
+    "GreaterFullEqual",
+    "GreaterGreater",
+    "GreaterLess",
+    "GreaterSlantEqual",
+    "GreaterTilde",
+    "Gscr",
+    "Gt",
+    "HARDcy",
+    "Hacek",
+    "Hat",
+    "Hcirc",
+    "Hfr",
+    "HilbertSpace",
+    "Hopf",
+    "HorizontalLine",
+    "Hscr",
+    "Hstrok",
+    "HumpDownHump",
+    "HumpEqual",
+    "IEcy",
+    "IJlig",
+    "IOcy",
+    "Iacut",
+    "Iacute",
+    "Icir",
+    "Icirc",
+    "Icy",
+    "Idot",
+    "Ifr",
+    "Igrav",
+    "Igrave",
+    "Im",
+    "Imacr",
+    "ImaginaryI",
+    "Implies",
+    "Int",
+    "Integral",
+    "Intersection",
+    "InvisibleComma",
+    "InvisibleTimes",
+    "Iogon",
+    "Iopf",
+    "Iota",
+    "Iscr",
+    "Itilde",
+    "Iukcy",
+    "Ium",
+    "Iuml",
+    "Jcirc",
+    "Jcy",
+    "Jfr",
+    "Jopf",
+    "Jscr",
+    "Jsercy",
+    "Jukcy",
+    "KHcy",
+    "KJcy",
+    "Kappa",
+    "Kcedil",
+    "Kcy",
+    "Kfr",
+    "Kopf",
+    "Kscr",
+    "LJcy",
+    "L",
+    "LT",
+    "Lacute",
+    "Lambda",
+    "Lang",
+    "Laplacetrf",
+    "Larr",
+    "Lcaron",
+    "Lcedil",
+    "Lcy",
+    "LeftAngleBracket",
+    "LeftArrow",
+    "LeftArrowBar",
+    "LeftArrowRightArrow",
+    "LeftCeiling",
+    "LeftDoubleBracket",
+    "LeftDownTeeVector",
+    "LeftDownVector",
+    "LeftDownVectorBar",
+    "LeftFloor",
+    "LeftRightArrow",
+    "LeftRightVector",
+    "LeftTee",
+    "LeftTeeArrow",
+    "LeftTeeVector",
+    "LeftTriangle",
+    "LeftTriangleBar",
+    "LeftTriangleEqual",
+    "LeftUpDownVector",
+    "LeftUpTeeVector",
+    "LeftUpVector",
+    "LeftUpVectorBar",
+    "LeftVector",
+    "LeftVectorBar",
+    "Leftarrow",
+    "Leftrightarrow",
+    "LessEqualGreater",
+    "LessFullEqual",
+    "LessGreater",
+    "LessLess",
+    "LessSlantEqual",
+    "LessTilde",
+    "Lfr",
+    "Ll",
+    "Lleftarrow",
+    "Lmidot",
+    "LongLeftArrow",
+    "LongLeftRightArrow",
+    "LongRightArrow",
+    "Longleftarrow",
+    "Longleftrightarrow",
+    "Longrightarrow",
+    "Lopf",
+    "LowerLeftArrow",
+    "LowerRightArrow",
+    "Lscr",
+    "Lsh",
+    "Lstrok",
+    "Lt",
+    "Map",
+    "Mcy",
+    "MediumSpace",
+    "Mellintrf",
+    "Mfr",
+    "MinusPlus",
+    "Mopf",
+    "Mscr",
+    "Mu",
+    "NJcy",
+    "Nacute",
+    "Ncaron",
+    "Ncedil",
+    "Ncy",
+    "NegativeMediumSpace",
+    "NegativeThickSpace",
+    "NegativeThinSpace",
+    "NegativeVeryThinSpace",
+    "NestedGreaterGreater",
+    "NestedLessLess",
+    "NewLine",
+    "Nfr",
+    "NoBreak",
+    "NonBreakingSpace",
+    "Nopf",
+    "Not",
+    "NotCongruent",
+    "NotCupCap",
+    "NotDoubleVerticalBar",
+    "NotElement",
+    "NotEqual",
+    "NotEqualTilde",
+    "NotExists",
+    "NotGreater",
+    "NotGreaterEqual",
+    "NotGreaterFullEqual",
+    "NotGreaterGreater",
+    "NotGreaterLess",
+    "NotGreaterSlantEqual",
+    "NotGreaterTilde",
+    "NotHumpDownHump",
+    "NotHumpEqual",
+    "NotLeftTriangle",
+    "NotLeftTriangleBar",
+    "NotLeftTriangleEqual",
+    "NotLess",
+    "NotLessEqual",
+    "NotLessGreater",
+    "NotLessLess",
+    "NotLessSlantEqual",
+    "NotLessTilde",
+    "NotNestedGreaterGreater",
+    "NotNestedLessLess",
+    "NotPrecedes",
+    "NotPrecedesEqual",
+    "NotPrecedesSlantEqual",
+    "NotReverseElement",
+    "NotRightTriangle",
+    "NotRightTriangleBar",
+    "NotRightTriangleEqual",
+    "NotSquareSubset",
+    "NotSquareSubsetEqual",
+    "NotSquareSuperset",
+    "NotSquareSupersetEqual",
+    "NotSubset",
+    "NotSubsetEqual",
+    "NotSucceeds",
+    "NotSucceedsEqual",
+    "NotSucceedsSlantEqual",
+    "NotSucceedsTilde",
+    "NotSuperset",
+    "NotSupersetEqual",
+    "NotTilde",
+    "NotTildeEqual",
+    "NotTildeFullEqual",
+    "NotTildeTilde",
+    "NotVerticalBar",
+    "Nscr",
+    "Ntild",
+    "Ntilde",
+    "Nu",
+    "OElig",
+    "Oacut",
+    "Oacute",
+    "Ocir",
+    "Ocirc",
+    "Ocy",
+    "Odblac",
+    "Ofr",
+    "Ograv",
+    "Ograve",
+    "Omacr",
+    "Omega",
+    "Omicron",
+    "Oopf",
+    "OpenCurlyDoubleQuote",
+    "OpenCurlyQuote",
+    "Or",
+    "Oscr",
+    "Oslas",
+    "Oslash",
+    "Otild",
+    "Otilde",
+    "Otimes",
+    "Oum",
+    "Ouml",
+    "OverBar",
+    "OverBrace",
+    "OverBracket",
+    "OverParenthesis",
+    "PartialD",
+    "Pcy",
+    "Pfr",
+    "Phi",
+    "Pi",
+    "PlusMinus",
+    "Poincareplane",
+    "Popf",
+    "Pr",
+    "Precedes",
+    "PrecedesEqual",
+    "PrecedesSlantEqual",
+    "PrecedesTilde",
+    "Prime",
+    "Product",
+    "Proportion",
+    "Proportional",
+    "Pscr",
+    "Psi",
+    "QUO",
+    "QUOT",
+    "Qfr",
+    "Qopf",
+    "Qscr",
+    "RBarr",
+    "RE",
+    "REG",
+    "Racute",
+    "Rang",
+    "Rarr",
+    "Rarrtl",
+    "Rcaron",
+    "Rcedil",
+    "Rcy",
+    "Re",
+    "ReverseElement",
+    "ReverseEquilibrium",
+    "ReverseUpEquilibrium",
+    "Rfr",
+    "Rho",
+    "RightAngleBracket",
+    "RightArrow",
+    "RightArrowBar",
+    "RightArrowLeftArrow",
+    "RightCeiling",
+    "RightDoubleBracket",
+    "RightDownTeeVector",
+    "RightDownVector",
+    "RightDownVectorBar",
+    "RightFloor",
+    "RightTee",
+    "RightTeeArrow",
+    "RightTeeVector",
+    "RightTriangle",
+    "RightTriangleBar",
+    "RightTriangleEqual",
+    "RightUpDownVector",
+    "RightUpTeeVector",
+    "RightUpVector",
+    "RightUpVectorBar",
+    "RightVector",
+    "RightVectorBar",
+    "Rightarrow",
+    "Ropf",
+    "RoundImplies",
+    "Rrightarrow",
+    "Rscr",
+    "Rsh",
+    "RuleDelayed",
+    "SHCHcy",
+    "SHcy",
+    "SOFTcy",
+    "Sacute",
+    "Sc",
+    "Scaron",
+    "Scedil",
+    "Scirc",
+    "Scy",
+    "Sfr",
+    "ShortDownArrow",
+    "ShortLeftArrow",
+    "ShortRightArrow",
+    "ShortUpArrow",
+    "Sigma",
+    "SmallCircle",
+    "Sopf",
+    "Sqrt",
+    "Square",
+    "SquareIntersection",
+    "SquareSubset",
+    "SquareSubsetEqual",
+    "SquareSuperset",
+    "SquareSupersetEqual",
+    "SquareUnion",
+    "Sscr",
+    "Star",
+    "Sub",
+    "Subset",
+    "SubsetEqual",
+    "Succeeds",
+    "SucceedsEqual",
+    "SucceedsSlantEqual",
+    "SucceedsTilde",
+    "SuchThat",
+    "Sum",
+    "Sup",
+    "Superset",
+    "SupersetEqual",
+    "Supset",
+    "THOR",
+    "THORN",
+    "TRADE",
+    "TSHcy",
+    "TScy",
+    "Tab",
+    "Tau",
+    "Tcaron",
+    "Tcedil",
+    "Tcy",
+    "Tfr",
+    "Therefore",
+    "Theta",
+    "ThickSpace",
+    "ThinSpace",
+    "Tilde",
+    "TildeEqual",
+    "TildeFullEqual",
+    "TildeTilde",
+    "Topf",
+    "TripleDot",
+    "Tscr",
+    "Tstrok",
+    "Uacut",
+    "Uacute",
+    "Uarr",
+    "Uarrocir",
+    "Ubrcy",
+    "Ubreve",
+    "Ucir",
+    "Ucirc",
+    "Ucy",
+    "Udblac",
+    "Ufr",
+    "Ugrav",
+    "Ugrave",
+    "Umacr",
+    "UnderBar",
+    "UnderBrace",
+    "UnderBracket",
+    "UnderParenthesis",
+    "Union",
+    "UnionPlus",
+    "Uogon",
+    "Uopf",
+    "UpArrow",
+    "UpArrowBar",
+    "UpArrowDownArrow",
+    "UpDownArrow",
+    "UpEquilibrium",
+    "UpTee",
+    "UpTeeArrow",
+    "Uparrow",
+    "Updownarrow",
+    "UpperLeftArrow",
+    "UpperRightArrow",
+    "Upsi",
+    "Upsilon",
+    "Uring",
+    "Uscr",
+    "Utilde",
+    "Uum",
+    "Uuml",
+    "VDash",
+    "Vbar",
+    "Vcy",
+    "Vdash",
+    "Vdashl",
+    "Vee",
+    "Verbar",
+    "Vert",
+    "VerticalBar",
+    "VerticalLine",
+    "VerticalSeparator",
+    "VerticalTilde",
+    "VeryThinSpace",
+    "Vfr",
+    "Vopf",
+    "Vscr",
+    "Vvdash",
+    "Wcirc",
+    "Wedge",
+    "Wfr",
+    "Wopf",
+    "Wscr",
+    "Xfr",
+    "Xi",
+    "Xopf",
+    "Xscr",
+    "YAcy",
+    "YIcy",
+    "YUcy",
+    "Yacut",
+    "Yacute",
+    "Ycirc",
+    "Ycy",
+    "Yfr",
+    "Yopf",
+    "Yscr",
+    "Yuml",
+    "ZHcy",
+    "Zacute",
+    "Zcaron",
+    "Zcy",
+    "Zdot",
+    "ZeroWidthSpace",
+    "Zeta",
+    "Zfr",
+    "Zopf",
+    "Zscr",
+    "aacut",
+    "aacute",
+    "abreve",
+    "ac",
+    "acE",
+    "acd",
+    "acir",
+    "acirc",
+    "acut",
+    "acute",
+    "acy",
+    "aeli",
+    "aelig",
+    "af",
+    "afr",
+    "agrav",
+    "agrave",
+    "alefsym",
+    "aleph",
+    "alpha",
+    "amacr",
+    "amalg",
+    "am",
+    "amp",
+    "and",
+    "andand",
+    "andd",
+    "andslope",
+    "andv",
+    "ang",
+    "ange",
+    "angle",
+    "angmsd",
+    "angmsdaa",
+    "angmsdab",
+    "angmsdac",
+    "angmsdad",
+    "angmsdae",
+    "angmsdaf",
+    "angmsdag",
+    "angmsdah",
+    "angrt",
+    "angrtvb",
+    "angrtvbd",
+    "angsph",
+    "angst",
+    "angzarr",
+    "aogon",
+    "aopf",
+    "ap",
+    "apE",
+    "apacir",
+    "ape",
+    "apid",
+    "apos",
+    "approx",
+    "approxeq",
+    "arin",
+    "aring",
+    "ascr",
+    "ast",
+    "asymp",
+    "asympeq",
+    "atild",
+    "atilde",
+    "aum",
+    "auml",
+    "awconint",
+    "awint",
+    "bNot",
+    "backcong",
+    "backepsilon",
+    "backprime",
+    "backsim",
+    "backsimeq",
+    "barvee",
+    "barwed",
+    "barwedge",
+    "bbrk",
+    "bbrktbrk",
+    "bcong",
+    "bcy",
+    "bdquo",
+    "becaus",
+    "because",
+    "bemptyv",
+    "bepsi",
+    "bernou",
+    "beta",
+    "beth",
+    "between",
+    "bfr",
+    "bigcap",
+    "bigcirc",
+    "bigcup",
+    "bigodot",
+    "bigoplus",
+    "bigotimes",
+    "bigsqcup",
+    "bigstar",
+    "bigtriangledown",
+    "bigtriangleup",
+    "biguplus",
+    "bigvee",
+    "bigwedge",
+    "bkarow",
+    "blacklozenge",
+    "blacksquare",
+    "blacktriangle",
+    "blacktriangledown",
+    "blacktriangleleft",
+    "blacktriangleright",
+    "blank",
+    "blk12",
+    "blk14",
+    "blk34",
+    "block",
+    "bne",
+    "bnequiv",
+    "bnot",
+    "bopf",
+    "bot",
+    "bottom",
+    "bowtie",
+    "boxDL",
+    "boxDR",
+    "boxDl",
+    "boxDr",
+    "boxH",
+    "boxHD",
+    "boxHU",
+    "boxHd",
+    "boxHu",
+    "boxUL",
+    "boxUR",
+    "boxUl",
+    "boxUr",
+    "boxV",
+    "boxVH",
+    "boxVL",
+    "boxVR",
+    "boxVh",
+    "boxVl",
+    "boxVr",
+    "boxbox",
+    "boxdL",
+    "boxdR",
+    "boxdl",
+    "boxdr",
+    "boxh",
+    "boxhD",
+    "boxhU",
+    "boxhd",
+    "boxhu",
+    "boxminus",
+    "boxplus",
+    "boxtimes",
+    "boxuL",
+    "boxuR",
+    "boxul",
+    "boxur",
+    "boxv",
+    "boxvH",
+    "boxvL",
+    "boxvR",
+    "boxvh",
+    "boxvl",
+    "boxvr",
+    "bprime",
+    "breve",
+    "brvba",
+    "brvbar",
+    "bscr",
+    "bsemi",
+    "bsim",
+    "bsime",
+    "bsol",
+    "bsolb",
+    "bsolhsub",
+    "bull",
+    "bullet",
+    "bump",
+    "bumpE",
+    "bumpe",
+    "bumpeq",
+    "cacute",
+    "cap",
+    "capand",
+    "capbrcup",
+    "capcap",
+    "capcup",
+    "capdot",
+    "caps",
+    "caret",
+    "caron",
+    "ccaps",
+    "ccaron",
+    "ccedi",
+    "ccedil",
+    "ccirc",
+    "ccups",
+    "ccupssm",
+    "cdot",
+    "cedi",
+    "cedil",
+    "cemptyv",
+    "cen",
+    "cent",
+    "centerdot",
+    "cfr",
+    "chcy",
+    "check",
+    "checkmark",
+    "chi",
+    "cir",
+    "cirE",
+    "circ",
+    "circeq",
+    "circlearrowleft",
+    "circlearrowright",
+    "circledR",
+    "circledS",
+    "circledast",
+    "circledcirc",
+    "circleddash",
+    "cire",
+    "cirfnint",
+    "cirmid",
+    "cirscir",
+    "clubs",
+    "clubsuit",
+    "colon",
+    "colone",
+    "coloneq",
+    "comma",
+    "commat",
+    "comp",
+    "compfn",
+    "complement",
+    "complexes",
+    "cong",
+    "congdot",
+    "conint",
+    "copf",
+    "coprod",
+    "cop",
+    "copy",
+    "copysr",
+    "crarr",
+    "cross",
+    "cscr",
+    "csub",
+    "csube",
+    "csup",
+    "csupe",
+    "ctdot",
+    "cudarrl",
+    "cudarrr",
+    "cuepr",
+    "cuesc",
+    "cularr",
+    "cularrp",
+    "cup",
+    "cupbrcap",
+    "cupcap",
+    "cupcup",
+    "cupdot",
+    "cupor",
+    "cups",
+    "curarr",
+    "curarrm",
+    "curlyeqprec",
+    "curlyeqsucc",
+    "curlyvee",
+    "curlywedge",
+    "curre",
+    "curren",
+    "curvearrowleft",
+    "curvearrowright",
+    "cuvee",
+    "cuwed",
+    "cwconint",
+    "cwint",
+    "cylcty",
+    "dArr",
+    "dHar",
+    "dagger",
+    "daleth",
+    "darr",
+    "dash",
+    "dashv",
+    "dbkarow",
+    "dblac",
+    "dcaron",
+    "dcy",
+    "dd",
+    "ddagger",
+    "ddarr",
+    "ddotseq",
+    "de",
+    "deg",
+    "delta",
+    "demptyv",
+    "dfisht",
+    "dfr",
+    "dharl",
+    "dharr",
+    "diam",
+    "diamond",
+    "diamondsuit",
+    "diams",
+    "die",
+    "digamma",
+    "disin",
+    "div",
+    "divid",
+    "divide",
+    "divideontimes",
+    "divonx",
+    "djcy",
+    "dlcorn",
+    "dlcrop",
+    "dollar",
+    "dopf",
+    "dot",
+    "doteq",
+    "doteqdot",
+    "dotminus",
+    "dotplus",
+    "dotsquare",
+    "doublebarwedge",
+    "downarrow",
+    "downdownarrows",
+    "downharpoonleft",
+    "downharpoonright",
+    "drbkarow",
+    "drcorn",
+    "drcrop",
+    "dscr",
+    "dscy",
+    "dsol",
+    "dstrok",
+    "dtdot",
+    "dtri",
+    "dtrif",
+    "duarr",
+    "duhar",
+    "dwangle",
+    "dzcy",
+    "dzigrarr",
+    "eDDot",
+    "eDot",
+    "eacut",
+    "eacute",
+    "easter",
+    "ecaron",
+    "ecir",
+    "ecirc",
+    "ecolon",
+    "ecy",
+    "edot",
+    "ee",
+    "efDot",
+    "efr",
+    "eg",
+    "egrav",
+    "egrave",
+    "egs",
+    "egsdot",
+    "el",
+    "elinters",
+    "ell",
+    "els",
+    "elsdot",
+    "emacr",
+    "empty",
+    "emptyset",
+    "emptyv",
+    "emsp13",
+    "emsp14",
+    "emsp",
+    "eng",
+    "ensp",
+    "eogon",
+    "eopf",
+    "epar",
+    "eparsl",
+    "eplus",
+    "epsi",
+    "epsilon",
+    "epsiv",
+    "eqcirc",
+    "eqcolon",
+    "eqsim",
+    "eqslantgtr",
+    "eqslantless",
+    "equals",
+    "equest",
+    "equiv",
+    "equivDD",
+    "eqvparsl",
+    "erDot",
+    "erarr",
+    "escr",
+    "esdot",
+    "esim",
+    "eta",
+    "et",
+    "eth",
+    "eum",
+    "euml",
+    "euro",
+    "excl",
+    "exist",
+    "expectation",
+    "exponentiale",
+    "fallingdotseq",
+    "fcy",
+    "female",
+    "ffilig",
+    "fflig",
+    "ffllig",
+    "ffr",
+    "filig",
+    "fjlig",
+    "flat",
+    "fllig",
+    "fltns",
+    "fnof",
+    "fopf",
+    "forall",
+    "fork",
+    "forkv",
+    "fpartint",
+    "frac1",
+    "frac12",
+    "frac13",
+    "frac14",
+    "frac15",
+    "frac16",
+    "frac18",
+    "frac23",
+    "frac25",
+    "frac3",
+    "frac34",
+    "frac35",
+    "frac38",
+    "frac45",
+    "frac56",
+    "frac58",
+    "frac78",
+    "frasl",
+    "frown",
+    "fscr",
+    "gE",
+    "gEl",
+    "gacute",
+    "gamma",
+    "gammad",
+    "gap",
+    "gbreve",
+    "gcirc",
+    "gcy",
+    "gdot",
+    "ge",
+    "gel",
+    "geq",
+    "geqq",
+    "geqslant",
+    "ges",
+    "gescc",
+    "gesdot",
+    "gesdoto",
+    "gesdotol",
+    "gesl",
+    "gesles",
+    "gfr",
+    "gg",
+    "ggg",
+    "gimel",
+    "gjcy",
+    "gl",
+    "glE",
+    "gla",
+    "glj",
+    "gnE",
+    "gnap",
+    "gnapprox",
+    "gne",
+    "gneq",
+    "gneqq",
+    "gnsim",
+    "gopf",
+    "grave",
+    "gscr",
+    "gsim",
+    "gsime",
+    "gsiml",
+    "g",
+    "gt",
+    "gtcc",
+    "gtcir",
+    "gtdot",
+    "gtlPar",
+    "gtquest",
+    "gtrapprox",
+    "gtrarr",
+    "gtrdot",
+    "gtreqless",
+    "gtreqqless",
+    "gtrless",
+    "gtrsim",
+    "gvertneqq",
+    "gvnE",
+    "hArr",
+    "hairsp",
+    "half",
+    "hamilt",
+    "hardcy",
+    "harr",
+    "harrcir",
+    "harrw",
+    "hbar",
+    "hcirc",
+    "hearts",
+    "heartsuit",
+    "hellip",
+    "hercon",
+    "hfr",
+    "hksearow",
+    "hkswarow",
+    "hoarr",
+    "homtht",
+    "hookleftarrow",
+    "hookrightarrow",
+    "hopf",
+    "horbar",
+    "hscr",
+    "hslash",
+    "hstrok",
+    "hybull",
+    "hyphen",
+    "iacut",
+    "iacute",
+    "ic",
+    "icir",
+    "icirc",
+    "icy",
+    "iecy",
+    "iexc",
+    "iexcl",
+    "iff",
+    "ifr",
+    "igrav",
+    "igrave",
+    "ii",
+    "iiiint",
+    "iiint",
+    "iinfin",
+    "iiota",
+    "ijlig",
+    "imacr",
+    "image",
+    "imagline",
+    "imagpart",
+    "imath",
+    "imof",
+    "imped",
+    "in",
+    "incare",
+    "infin",
+    "infintie",
+    "inodot",
+    "int",
+    "intcal",
+    "integers",
+    "intercal",
+    "intlarhk",
+    "intprod",
+    "iocy",
+    "iogon",
+    "iopf",
+    "iota",
+    "iprod",
+    "iques",
+    "iquest",
+    "iscr",
+    "isin",
+    "isinE",
+    "isindot",
+    "isins",
+    "isinsv",
+    "isinv",
+    "it",
+    "itilde",
+    "iukcy",
+    "ium",
+    "iuml",
+    "jcirc",
+    "jcy",
+    "jfr",
+    "jmath",
+    "jopf",
+    "jscr",
+    "jsercy",
+    "jukcy",
+    "kappa",
+    "kappav",
+    "kcedil",
+    "kcy",
+    "kfr",
+    "kgreen",
+    "khcy",
+    "kjcy",
+    "kopf",
+    "kscr",
+    "lAarr",
+    "lArr",
+    "lAtail",
+    "lBarr",
+    "lE",
+    "lEg",
+    "lHar",
+    "lacute",
+    "laemptyv",
+    "lagran",
+    "lambda",
+    "lang",
+    "langd",
+    "langle",
+    "lap",
+    "laqu",
+    "laquo",
+    "larr",
+    "larrb",
+    "larrbfs",
+    "larrfs",
+    "larrhk",
+    "larrlp",
+    "larrpl",
+    "larrsim",
+    "larrtl",
+    "lat",
+    "latail",
+    "late",
+    "lates",
+    "lbarr",
+    "lbbrk",
+    "lbrace",
+    "lbrack",
+    "lbrke",
+    "lbrksld",
+    "lbrkslu",
+    "lcaron",
+    "lcedil",
+    "lceil",
+    "lcub",
+    "lcy",
+    "ldca",
+    "ldquo",
+    "ldquor",
+    "ldrdhar",
+    "ldrushar",
+    "ldsh",
+    "le",
+    "leftarrow",
+    "leftarrowtail",
+    "leftharpoondown",
+    "leftharpoonup",
+    "leftleftarrows",
+    "leftrightarrow",
+    "leftrightarrows",
+    "leftrightharpoons",
+    "leftrightsquigarrow",
+    "leftthreetimes",
+    "leg",
+    "leq",
+    "leqq",
+    "leqslant",
+    "les",
+    "lescc",
+    "lesdot",
+    "lesdoto",
+    "lesdotor",
+    "lesg",
+    "lesges",
+    "lessapprox",
+    "lessdot",
+    "lesseqgtr",
+    "lesseqqgtr",
+    "lessgtr",
+    "lesssim",
+    "lfisht",
+    "lfloor",
+    "lfr",
+    "lg",
+    "lgE",
+    "lhard",
+    "lharu",
+    "lharul",
+    "lhblk",
+    "ljcy",
+    "ll",
+    "llarr",
+    "llcorner",
+    "llhard",
+    "lltri",
+    "lmidot",
+    "lmoust",
+    "lmoustache",
+    "lnE",
+    "lnap",
+    "lnapprox",
+    "lne",
+    "lneq",
+    "lneqq",
+    "lnsim",
+    "loang",
+    "loarr",
+    "lobrk",
+    "longleftarrow",
+    "longleftrightarrow",
+    "longmapsto",
+    "longrightarrow",
+    "looparrowleft",
+    "looparrowright",
+    "lopar",
+    "lopf",
+    "loplus",
+    "lotimes",
+    "lowast",
+    "lowbar",
+    "loz",
+    "lozenge",
+    "lozf",
+    "lpar",
+    "lparlt",
+    "lrarr",
+    "lrcorner",
+    "lrhar",
+    "lrhard",
+    "lrm",
+    "lrtri",
+    "lsaquo",
+    "lscr",
+    "lsh",
+    "lsim",
+    "lsime",
+    "lsimg",
+    "lsqb",
+    "lsquo",
+    "lsquor",
+    "lstrok",
+    "l",
+    "lt",
+    "ltcc",
+    "ltcir",
+    "ltdot",
+    "lthree",
+    "ltimes",
+    "ltlarr",
+    "ltquest",
+    "ltrPar",
+    "ltri",
+    "ltrie",
+    "ltrif",
+    "lurdshar",
+    "luruhar",
+    "lvertneqq",
+    "lvnE",
+    "mDDot",
+    "mac",
+    "macr",
+    "male",
+    "malt",
+    "maltese",
+    "map",
+    "mapsto",
+    "mapstodown",
+    "mapstoleft",
+    "mapstoup",
+    "marker",
+    "mcomma",
+    "mcy",
+    "mdash",
+    "measuredangle",
+    "mfr",
+    "mho",
+    "micr",
+    "micro",
+    "mid",
+    "midast",
+    "midcir",
+    "middo",
+    "middot",
+    "minus",
+    "minusb",
+    "minusd",
+    "minusdu",
+    "mlcp",
+    "mldr",
+    "mnplus",
+    "models",
+    "mopf",
+    "mp",
+    "mscr",
+    "mstpos",
+    "mu",
+    "multimap",
+    "mumap",
+    "nGg",
+    "nGt",
+    "nGtv",
+    "nLeftarrow",
+    "nLeftrightarrow",
+    "nLl",
+    "nLt",
+    "nLtv",
+    "nRightarrow",
+    "nVDash",
+    "nVdash",
+    "nabla",
+    "nacute",
+    "nang",
+    "nap",
+    "napE",
+    "napid",
+    "napos",
+    "napprox",
+    "natur",
+    "natural",
+    "naturals",
+    "nbs",
+    "nbsp",
+    "nbump",
+    "nbumpe",
+    "ncap",
+    "ncaron",
+    "ncedil",
+    "ncong",
+    "ncongdot",
+    "ncup",
+    "ncy",
+    "ndash",
+    "ne",
+    "neArr",
+    "nearhk",
+    "nearr",
+    "nearrow",
+    "nedot",
+    "nequiv",
+    "nesear",
+    "nesim",
+    "nexist",
+    "nexists",
+    "nfr",
+    "ngE",
+    "nge",
+    "ngeq",
+    "ngeqq",
+    "ngeqslant",
+    "nges",
+    "ngsim",
+    "ngt",
+    "ngtr",
+    "nhArr",
+    "nharr",
+    "nhpar",
+    "ni",
+    "nis",
+    "nisd",
+    "niv",
+    "njcy",
+    "nlArr",
+    "nlE",
+    "nlarr",
+    "nldr",
+    "nle",
+    "nleftarrow",
+    "nleftrightarrow",
+    "nleq",
+    "nleqq",
+    "nleqslant",
+    "nles",
+    "nless",
+    "nlsim",
+    "nlt",
+    "nltri",
+    "nltrie",
+    "nmid",
+    "nopf",
+    "no",
+    "not",
+    "notin",
+    "notinE",
+    "notindot",
+    "notinva",
+    "notinvb",
+    "notinvc",
+    "notni",
+    "notniva",
+    "notnivb",
+    "notnivc",
+    "npar",
+    "nparallel",
+    "nparsl",
+    "npart",
+    "npolint",
+    "npr",
+    "nprcue",
+    "npre",
+    "nprec",
+    "npreceq",
+    "nrArr",
+    "nrarr",
+    "nrarrc",
+    "nrarrw",
+    "nrightarrow",
+    "nrtri",
+    "nrtrie",
+    "nsc",
+    "nsccue",
+    "nsce",
+    "nscr",
+    "nshortmid",
+    "nshortparallel",
+    "nsim",
+    "nsime",
+    "nsimeq",
+    "nsmid",
+    "nspar",
+    "nsqsube",
+    "nsqsupe",
+    "nsub",
+    "nsubE",
+    "nsube",
+    "nsubset",
+    "nsubseteq",
+    "nsubseteqq",
+    "nsucc",
+    "nsucceq",
+    "nsup",
+    "nsupE",
+    "nsupe",
+    "nsupset",
+    "nsupseteq",
+    "nsupseteqq",
+    "ntgl",
+    "ntild",
+    "ntilde",
+    "ntlg",
+    "ntriangleleft",
+    "ntrianglelefteq",
+    "ntriangleright",
+    "ntrianglerighteq",
+    "nu",
+    "num",
+    "numero",
+    "numsp",
+    "nvDash",
+    "nvHarr",
+    "nvap",
+    "nvdash",
+    "nvge",
+    "nvgt",
+    "nvinfin",
+    "nvlArr",
+    "nvle",
+    "nvlt",
+    "nvltrie",
+    "nvrArr",
+    "nvrtrie",
+    "nvsim",
+    "nwArr",
+    "nwarhk",
+    "nwarr",
+    "nwarrow",
+    "nwnear",
+    "oS",
+    "oacut",
+    "oacute",
+    "oast",
+    "ocir",
+    "ocirc",
+    "ocy",
+    "odash",
+    "odblac",
+    "odiv",
+    "odot",
+    "odsold",
+    "oelig",
+    "ofcir",
+    "ofr",
+    "ogon",
+    "ograv",
+    "ograve",
+    "ogt",
+    "ohbar",
+    "ohm",
+    "oint",
+    "olarr",
+    "olcir",
+    "olcross",
+    "oline",
+    "olt",
+    "omacr",
+    "omega",
+    "omicron",
+    "omid",
+    "ominus",
+    "oopf",
+    "opar",
+    "operp",
+    "oplus",
+    "or",
+    "orarr",
+    "ord",
+    "order",
+    "orderof",
+    "ordf",
+    "ordm",
+    "origof",
+    "oror",
+    "orslope",
+    "orv",
+    "oscr",
+    "oslas",
+    "oslash",
+    "osol",
+    "otild",
+    "otilde",
+    "otimes",
+    "otimesas",
+    "oum",
+    "ouml",
+    "ovbar",
+    "par",
+    "para",
+    "parallel",
+    "parsim",
+    "parsl",
+    "part",
+    "pcy",
+    "percnt",
+    "period",
+    "permil",
+    "perp",
+    "pertenk",
+    "pfr",
+    "phi",
+    "phiv",
+    "phmmat",
+    "phone",
+    "pi",
+    "pitchfork",
+    "piv",
+    "planck",
+    "planckh",
+    "plankv",
+    "plus",
+    "plusacir",
+    "plusb",
+    "pluscir",
+    "plusdo",
+    "plusdu",
+    "pluse",
+    "plusm",
+    "plusmn",
+    "plussim",
+    "plustwo",
+    "pm",
+    "pointint",
+    "popf",
+    "poun",
+    "pound",
+    "pr",
+    "prE",
+    "prap",
+    "prcue",
+    "pre",
+    "prec",
+    "precapprox",
+    "preccurlyeq",
+    "preceq",
+    "precnapprox",
+    "precneqq",
+    "precnsim",
+    "precsim",
+    "prime",
+    "primes",
+    "prnE",
+    "prnap",
+    "prnsim",
+    "prod",
+    "profalar",
+    "profline",
+    "profsurf",
+    "prop",
+    "propto",
+    "prsim",
+    "prurel",
+    "pscr",
+    "psi",
+    "puncsp",
+    "qfr",
+    "qint",
+    "qopf",
+    "qprime",
+    "qscr",
+    "quaternions",
+    "quatint",
+    "quest",
+    "questeq",
+    "quo",
+    "quot",
+    "rAarr",
+    "rArr",
+    "rAtail",
+    "rBarr",
+    "rHar",
+    "race",
+    "racute",
+    "radic",
+    "raemptyv",
+    "rang",
+    "rangd",
+    "range",
+    "rangle",
+    "raqu",
+    "raquo",
+    "rarr",
+    "rarrap",
+    "rarrb",
+    "rarrbfs",
+    "rarrc",
+    "rarrfs",
+    "rarrhk",
+    "rarrlp",
+    "rarrpl",
+    "rarrsim",
+    "rarrtl",
+    "rarrw",
+    "ratail",
+    "ratio",
+    "rationals",
+    "rbarr",
+    "rbbrk",
+    "rbrace",
+    "rbrack",
+    "rbrke",
+    "rbrksld",
+    "rbrkslu",
+    "rcaron",
+    "rcedil",
+    "rceil",
+    "rcub",
+    "rcy",
+    "rdca",
+    "rdldhar",
+    "rdquo",
+    "rdquor",
+    "rdsh",
+    "real",
+    "realine",
+    "realpart",
+    "reals",
+    "rect",
+    "re",
+    "reg",
+    "rfisht",
+    "rfloor",
+    "rfr",
+    "rhard",
+    "rharu",
+    "rharul",
+    "rho",
+    "rhov",
+    "rightarrow",
+    "rightarrowtail",
+    "rightharpoondown",
+    "rightharpoonup",
+    "rightleftarrows",
+    "rightleftharpoons",
+    "rightrightarrows",
+    "rightsquigarrow",
+    "rightthreetimes",
+    "ring",
+    "risingdotseq",
+    "rlarr",
+    "rlhar",
+    "rlm",
+    "rmoust",
+    "rmoustache",
+    "rnmid",
+    "roang",
+    "roarr",
+    "robrk",
+    "ropar",
+    "ropf",
+    "roplus",
+    "rotimes",
+    "rpar",
+    "rpargt",
+    "rppolint",
+    "rrarr",
+    "rsaquo",
+    "rscr",
+    "rsh",
+    "rsqb",
+    "rsquo",
+    "rsquor",
+    "rthree",
+    "rtimes",
+    "rtri",
+    "rtrie",
+    "rtrif",
+    "rtriltri",
+    "ruluhar",
+    "rx",
+    "sacute",
+    "sbquo",
+    "sc",
+    "scE",
+    "scap",
+    "scaron",
+    "sccue",
+    "sce",
+    "scedil",
+    "scirc",
+    "scnE",
+    "scnap",
+    "scnsim",
+    "scpolint",
+    "scsim",
+    "scy",
+    "sdot",
+    "sdotb",
+    "sdote",
+    "seArr",
+    "searhk",
+    "searr",
+    "searrow",
+    "sec",
+    "sect",
+    "semi",
+    "seswar",
+    "setminus",
+    "setmn",
+    "sext",
+    "sfr",
+    "sfrown",
+    "sharp",
+    "shchcy",
+    "shcy",
+    "shortmid",
+    "shortparallel",
+    "sh",
+    "shy",
+    "sigma",
+    "sigmaf",
+    "sigmav",
+    "sim",
+    "simdot",
+    "sime",
+    "simeq",
+    "simg",
+    "simgE",
+    "siml",
+    "simlE",
+    "simne",
+    "simplus",
+    "simrarr",
+    "slarr",
+    "smallsetminus",
+    "smashp",
+    "smeparsl",
+    "smid",
+    "smile",
+    "smt",
+    "smte",
+    "smtes",
+    "softcy",
+    "sol",
+    "solb",
+    "solbar",
+    "sopf",
+    "spades",
+    "spadesuit",
+    "spar",
+    "sqcap",
+    "sqcaps",
+    "sqcup",
+    "sqcups",
+    "sqsub",
+    "sqsube",
+    "sqsubset",
+    "sqsubseteq",
+    "sqsup",
+    "sqsupe",
+    "sqsupset",
+    "sqsupseteq",
+    "squ",
+    "square",
+    "squarf",
+    "squf",
+    "srarr",
+    "sscr",
+    "ssetmn",
+    "ssmile",
+    "sstarf",
+    "star",
+    "starf",
+    "straightepsilon",
+    "straightphi",
+    "strns",
+    "sub",
+    "subE",
+    "subdot",
+    "sube",
+    "subedot",
+    "submult",
+    "subnE",
+    "subne",
+    "subplus",
+    "subrarr",
+    "subset",
+    "subseteq",
+    "subseteqq",
+    "subsetneq",
+    "subsetneqq",
+    "subsim",
+    "subsub",
+    "subsup",
+    "succ",
+    "succapprox",
+    "succcurlyeq",
+    "succeq",
+    "succnapprox",
+    "succneqq",
+    "succnsim",
+    "succsim",
+    "sum",
+    "sung",
+    "sup",
+    "sup1",
+    "sup2",
+    "sup3",
+    "supE",
+    "supdot",
+    "supdsub",
+    "supe",
+    "supedot",
+    "suphsol",
+    "suphsub",
+    "suplarr",
+    "supmult",
+    "supnE",
+    "supne",
+    "supplus",
+    "supset",
+    "supseteq",
+    "supseteqq",
+    "supsetneq",
+    "supsetneqq",
+    "supsim",
+    "supsub",
+    "supsup",
+    "swArr",
+    "swarhk",
+    "swarr",
+    "swarrow",
+    "swnwar",
+    "szli",
+    "szlig",
+    "target",
+    "tau",
+    "tbrk",
+    "tcaron",
+    "tcedil",
+    "tcy",
+    "tdot",
+    "telrec",
+    "tfr",
+    "there4",
+    "therefore",
+    "theta",
+    "thetasym",
+    "thetav",
+    "thickapprox",
+    "thicksim",
+    "thinsp",
+    "thkap",
+    "thksim",
+    "thor",
+    "thorn",
+    "tilde",
+    "time",
+    "times",
+    "timesb",
+    "timesbar",
+    "timesd",
+    "tint",
+    "toea",
+    "top",
+    "topbot",
+    "topcir",
+    "topf",
+    "topfork",
+    "tosa",
+    "tprime",
+    "trade",
+    "triangle",
+    "triangledown",
+    "triangleleft",
+    "trianglelefteq",
+    "triangleq",
+    "triangleright",
+    "trianglerighteq",
+    "tridot",
+    "trie",
+    "triminus",
+    "triplus",
+    "trisb",
+    "tritime",
+    "trpezium",
+    "tscr",
+    "tscy",
+    "tshcy",
+    "tstrok",
+    "twixt",
+    "twoheadleftarrow",
+    "twoheadrightarrow",
+    "uArr",
+    "uHar",
+    "uacut",
+    "uacute",
+    "uarr",
+    "ubrcy",
+    "ubreve",
+    "ucir",
+    "ucirc",
+    "ucy",
+    "udarr",
+    "udblac",
+    "udhar",
+    "ufisht",
+    "ufr",
+    "ugrav",
+    "ugrave",
+    "uharl",
+    "uharr",
+    "uhblk",
+    "ulcorn",
+    "ulcorner",
+    "ulcrop",
+    "ultri",
+    "umacr",
+    "um",
+    "uml",
+    "uogon",
+    "uopf",
+    "uparrow",
+    "updownarrow",
+    "upharpoonleft",
+    "upharpoonright",
+    "uplus",
+    "upsi",
+    "upsih",
+    "upsilon",
+    "upuparrows",
+    "urcorn",
+    "urcorner",
+    "urcrop",
+    "uring",
+    "urtri",
+    "uscr",
+    "utdot",
+    "utilde",
+    "utri",
+    "utrif",
+    "uuarr",
+    "uum",
+    "uuml",
+    "uwangle",
+    "vArr",
+    "vBar",
+    "vBarv",
+    "vDash",
+    "vangrt",
+    "varepsilon",
+    "varkappa",
+    "varnothing",
+    "varphi",
+    "varpi",
+    "varpropto",
+    "varr",
+    "varrho",
+    "varsigma",
+    "varsubsetneq",
+    "varsubsetneqq",
+    "varsupsetneq",
+    "varsupsetneqq",
+    "vartheta",
+    "vartriangleleft",
+    "vartriangleright",
+    "vcy",
+    "vdash",
+    "vee",
+    "veebar",
+    "veeeq",
+    "vellip",
+    "verbar",
+    "vert",
+    "vfr",
+    "vltri",
+    "vnsub",
+    "vnsup",
+    "vopf",
+    "vprop",
+    "vrtri",
+    "vscr",
+    "vsubnE",
+    "vsubne",
+    "vsupnE",
+    "vsupne",
+    "vzigzag",
+    "wcirc",
+    "wedbar",
+    "wedge",
+    "wedgeq",
+    "weierp",
+    "wfr",
+    "wopf",
+    "wp",
+    "wr",
+    "wreath",
+    "wscr",
+    "xcap",
+    "xcirc",
+    "xcup",
+    "xdtri",
+    "xfr",
+    "xhArr",
+    "xharr",
+    "xi",
+    "xlArr",
+    "xlarr",
+    "xmap",
+    "xnis",
+    "xodot",
+    "xopf",
+    "xoplus",
+    "xotime",
+    "xrArr",
+    "xrarr",
+    "xscr",
+    "xsqcup",
+    "xuplus",
+    "xutri",
+    "xvee",
+    "xwedge",
+    "yacut",
+    "yacute",
+    "yacy",
+    "ycirc",
+    "ycy",
+    "ye",
+    "yen",
+    "yfr",
+    "yicy",
+    "yopf",
+    "yscr",
+    "yucy",
+    "yum",
+    "yuml",
+    "zacute",
+    "zcaron",
+    "zcy",
+    "zdot",
+    "zeetrf",
+    "zeta",
+    "zfr",
+    "zhcy",
+    "zigrarr",
+    "zopf",
+    "zscr",
+    "zwj",
+    "zwnj",
+];
+
+/// List of values corresponding to names of named
+/// [character references][character_reference].
+///
+/// The corresponding names of this list are stored in
+/// [`CHARACTER_REFERENCE_NAMES`][].
+/// They correspond through their index.
+///
+/// ## References
+///
+/// *   [*§ 2.5 Entity and numeric character references* in `CommonMark`](https://spec.commonmark.org/0.30/#entity-and-numeric-character-references)
+///
+/// [character_reference]: crate::construct::character_reference
+pub const CHARACTER_REFERENCE_VALUES: [&str; 2222] = [
+    "Æ", "Æ", "&", "&", "Á", "Á", "Ă", "Â", "Â", "А", "𝔄", "À", "À", "Α", "Ā", "⩓", "Ą", "𝔸", "⁡",
+    "Å", "Å", "𝒜", "≔", "Ã", "Ã", "Ä", "Ä", "∖", "⫧", "⌆", "Б", "∵", "ℬ", "Β", "𝔅", "𝔹", "˘", "ℬ",
+    "≎", "Ч", "©", "©", "Ć", "⋒", "ⅅ", "ℭ", "Č", "Ç", "Ç", "Ĉ", "∰", "Ċ", "¸", "·", "ℭ", "Χ", "⊙",
+    "⊖", "⊕", "⊗", "∲", "”", "’", "∷", "⩴", "≡", "∯", "∮", "ℂ", "∐", "∳", "⨯", "𝒞", "⋓", "≍", "ⅅ",
+    "⤑", "Ђ", "Ѕ", "Џ", "‡", "↡", "⫤", "Ď", "Д", "∇", "Δ", "𝔇", "´", "˙", "˝", "`", "˜", "⋄", "ⅆ",
+    "𝔻", "¨", "⃜", "≐", "∯", "¨", "⇓", "⇐", "⇔", "⫤", "⟸", "⟺", "⟹", "⇒", "⊨", "⇑", "⇕", "∥", "↓",
+    "⤓", "⇵", "̑", "⥐", "⥞", "↽", "⥖", "⥟", "⇁", "⥗", "⊤", "↧", "⇓", "𝒟", "Đ", "Ŋ", "Ð", "Ð", "É",
+    "É", "Ě", "Ê", "Ê", "Э", "Ė", "𝔈", "È", "È", "∈", "Ē", "◻", "▫", "Ę", "𝔼", "Ε", "⩵", "≂", "⇌",
+    "ℰ", "⩳", "Η", "Ë", "Ë", "∃", "ⅇ", "Ф", "𝔉", "◼", "▪", "𝔽", "∀", "ℱ", "ℱ", "Ѓ", ">", ">", "Γ",
+    "Ϝ", "Ğ", "Ģ", "Ĝ", "Г", "Ġ", "𝔊", "⋙", "𝔾", "≥", "⋛", "≧", "⪢", "≷", "⩾", "≳", "𝒢", "≫", "Ъ",
+    "ˇ", "^", "Ĥ", "ℌ", "ℋ", "ℍ", "─", "ℋ", "Ħ", "≎", "≏", "Е", "IJ", "Ё", "Í", "Í", "Î", "Î", "И",
+    "İ", "ℑ", "Ì", "Ì", "ℑ", "Ī", "ⅈ", "⇒", "∬", "∫", "⋂", "⁣", "⁢", "Į", "𝕀", "Ι", "ℐ", "Ĩ", "І",
+    "Ï", "Ï", "Ĵ", "Й", "𝔍", "𝕁", "𝒥", "Ј", "Є", "Х", "Ќ", "Κ", "Ķ", "К", "𝔎", "𝕂", "𝒦", "Љ", "<",
+    "<", "Ĺ", "Λ", "⟪", "ℒ", "↞", "Ľ", "Ļ", "Л", "⟨", "←", "⇤", "⇆", "⌈", "⟦", "⥡", "⇃", "⥙", "⌊",
+    "↔", "⥎", "⊣", "↤", "⥚", "⊲", "⧏", "⊴", "⥑", "⥠", "↿", "⥘", "↼", "⥒", "⇐", "⇔", "⋚", "≦", "≶",
+    "⪡", "⩽", "≲", "𝔏", "⋘", "⇚", "Ŀ", "⟵", "⟷", "⟶", "⟸", "⟺", "⟹", "𝕃", "↙", "↘", "ℒ", "↰", "Ł",
+    "≪", "⤅", "М", " ", "ℳ", "𝔐", "∓", "𝕄", "ℳ", "Μ", "Њ", "Ń", "Ň", "Ņ", "Н", "\u{200B}",
+    "\u{200B}", "\u{200B}", "\u{200B}", "≫", "≪", "\n", "𝔑", "\u{2060}", " ", "ℕ", "⫬", "≢", "≭",
+    "∦", "∉", "≠", "≂̸", "∄", "≯", "≱", "≧̸", "≫̸", "≹", "⩾̸", "≵", "≎̸", "≏̸", "⋪", "⧏̸", "⋬", "≮", "≰",
+    "≸", "≪̸", "⩽̸", "≴", "⪢̸", "⪡̸", "⊀", "⪯̸", "⋠", "∌", "⋫", "⧐̸", "⋭", "⊏̸", "⋢", "⊐̸", "⋣", "⊂⃒", "⊈",
+    "⊁", "⪰̸", "⋡", "≿̸", "⊃⃒", "⊉", "≁", "≄", "≇", "≉", "∤", "𝒩", "Ñ", "Ñ", "Ν", "Œ", "Ó", "Ó", "Ô",
+    "Ô", "О", "Ő", "𝔒", "Ò", "Ò", "Ō", "Ω", "Ο", "𝕆", "“", "‘", "⩔", "𝒪", "Ø", "Ø", "Õ", "Õ", "⨷",
+    "Ö", "Ö", "‾", "⏞", "⎴", "⏜", "∂", "П", "𝔓", "Φ", "Π", "±", "ℌ", "ℙ", "⪻", "≺", "⪯", "≼", "≾",
+    "″", "∏", "∷", "∝", "𝒫", "Ψ", "\"", "\"", "𝔔", "ℚ", "𝒬", "⤐", "®", "®", "Ŕ", "⟫", "↠", "⤖",
+    "Ř", "Ŗ", "Р", "ℜ", "∋", "⇋", "⥯", "ℜ", "Ρ", "⟩", "→", "⇥", "⇄", "⌉", "⟧", "⥝", "⇂", "⥕", "⌋",
+    "⊢", "↦", "⥛", "⊳", "⧐", "⊵", "⥏", "⥜", "↾", "⥔", "⇀", "⥓", "⇒", "ℝ", "⥰", "⇛", "ℛ", "↱", "⧴",
+    "Щ", "Ш", "Ь", "Ś", "⪼", "Š", "Ş", "Ŝ", "С", "𝔖", "↓", "←", "→", "↑", "Σ", "∘", "𝕊", "√", "□",
+    "⊓", "⊏", "⊑", "⊐", "⊒", "⊔", "𝒮", "⋆", "⋐", "⋐", "⊆", "≻", "⪰", "≽", "≿", "∋", "∑", "⋑", "⊃",
+    "⊇", "⋑", "Þ", "Þ", "™", "Ћ", "Ц", "\t", "Τ", "Ť", "Ţ", "Т", "𝔗", "∴", "Θ", "  ", " ", "∼",
+    "≃", "≅", "≈", "𝕋", "⃛", "𝒯", "Ŧ", "Ú", "Ú", "↟", "⥉", "Ў", "Ŭ", "Û", "Û", "У", "Ű", "𝔘", "Ù",
+    "Ù", "Ū", "_", "⏟", "⎵", "⏝", "⋃", "⊎", "Ų", "𝕌", "↑", "⤒", "⇅", "↕", "⥮", "⊥", "↥", "⇑", "⇕",
+    "↖", "↗", "ϒ", "Υ", "Ů", "𝒰", "Ũ", "Ü", "Ü", "⊫", "⫫", "В", "⊩", "⫦", "⋁", "‖", "‖", "∣", "|",
+    "❘", "≀", " ", "𝔙", "𝕍", "𝒱", "⊪", "Ŵ", "⋀", "𝔚", "𝕎", "𝒲", "𝔛", "Ξ", "𝕏", "𝒳", "Я", "Ї", "Ю",
+    "Ý", "Ý", "Ŷ", "Ы", "𝔜", "𝕐", "𝒴", "Ÿ", "Ж", "Ź", "Ž", "З", "Ż", "\u{200B}", "Ζ", "ℨ", "ℤ",
+    "𝒵", "á", "á", "ă", "∾", "∾̳", "∿", "â", "â", "´", "´", "а", "æ", "æ", "⁡", "𝔞", "à", "à", "ℵ",
+    "ℵ", "α", "ā", "⨿", "&", "&", "∧", "⩕", "⩜", "⩘", "⩚", "∠", "⦤", "∠", "∡", "⦨", "⦩", "⦪", "⦫",
+    "⦬", "⦭", "⦮", "⦯", "∟", "⊾", "⦝", "∢", "Å", "⍼", "ą", "𝕒", "≈", "⩰", "⩯", "≊", "≋", "'", "≈",
+    "≊", "å", "å", "𝒶", "*", "≈", "≍", "ã", "ã", "ä", "ä", "∳", "⨑", "⫭", "≌", "϶", "‵", "∽", "⋍",
+    "⊽", "⌅", "⌅", "⎵", "⎶", "≌", "б", "„", "∵", "∵", "⦰", "϶", "ℬ", "β", "ℶ", "≬", "𝔟", "⋂", "◯",
+    "⋃", "⨀", "⨁", "⨂", "⨆", "★", "▽", "△", "⨄", "⋁", "⋀", "⤍", "⧫", "▪", "▴", "▾", "◂", "▸", "␣",
+    "▒", "░", "▓", "█", "=⃥", "≡⃥", "⌐", "𝕓", "⊥", "⊥", "⋈", "╗", "╔", "╖", "╓", "═", "╦", "╩", "╤",
+    "╧", "╝", "╚", "╜", "╙", "║", "╬", "╣", "╠", "╫", "╢", "╟", "⧉", "╕", "╒", "┐", "┌", "─", "╥",
+    "╨", "┬", "┴", "⊟", "⊞", "⊠", "╛", "╘", "┘", "└", "│", "╪", "╡", "╞", "┼", "┤", "├", "‵", "˘",
+    "¦", "¦", "𝒷", "⁏", "∽", "⋍", "\\", "⧅", "⟈", "•", "•", "≎", "⪮", "≏", "≏", "ć", "∩", "⩄", "⩉",
+    "⩋", "⩇", "⩀", "∩︀", "⁁", "ˇ", "⩍", "č", "ç", "ç", "ĉ", "⩌", "⩐", "ċ", "¸", "¸", "⦲", "¢", "¢",
+    "·", "𝔠", "ч", "✓", "✓", "χ", "○", "⧃", "ˆ", "≗", "↺", "↻", "®", "Ⓢ", "⊛", "⊚", "⊝", "≗", "⨐",
+    "⫯", "⧂", "♣", "♣", ":", "≔", "≔", ",", "@", "∁", "∘", "∁", "ℂ", "≅", "⩭", "∮", "𝕔", "∐", "©",
+    "©", "℗", "↵", "✗", "𝒸", "⫏", "⫑", "⫐", "⫒", "⋯", "⤸", "⤵", "⋞", "⋟", "↶", "⤽", "∪", "⩈", "⩆",
+    "⩊", "⊍", "⩅", "∪︀", "↷", "⤼", "⋞", "⋟", "⋎", "⋏", "¤", "¤", "↶", "↷", "⋎", "⋏", "∲", "∱", "⌭",
+    "⇓", "⥥", "†", "ℸ", "↓", "‐", "⊣", "⤏", "˝", "ď", "д", "ⅆ", "‡", "⇊", "⩷", "°", "°", "δ", "⦱",
+    "⥿", "𝔡", "⇃", "⇂", "⋄", "⋄", "♦", "♦", "¨", "ϝ", "⋲", "÷", "÷", "÷", "⋇", "⋇", "ђ", "⌞", "⌍",
+    "$", "𝕕", "˙", "≐", "≑", "∸", "∔", "⊡", "⌆", "↓", "⇊", "⇃", "⇂", "⤐", "⌟", "⌌", "𝒹", "ѕ", "⧶",
+    "đ", "⋱", "▿", "▾", "⇵", "⥯", "⦦", "џ", "⟿", "⩷", "≑", "é", "é", "⩮", "ě", "ê", "ê", "≕", "э",
+    "ė", "ⅇ", "≒", "𝔢", "⪚", "è", "è", "⪖", "⪘", "⪙", "⏧", "ℓ", "⪕", "⪗", "ē", "∅", "∅", "∅", " ",
+    " ", " ", "ŋ", " ", "ę", "𝕖", "⋕", "⧣", "⩱", "ε", "ε", "ϵ", "≖", "≕", "≂", "⪖", "⪕", "=", "≟",
+    "≡", "⩸", "⧥", "≓", "⥱", "ℯ", "≐", "≂", "η", "ð", "ð", "ë", "ë", "€", "!", "∃", "ℰ", "ⅇ", "≒",
+    "ф", "♀", "ffi", "ff", "ffl", "𝔣", "fi", "fj", "♭", "fl", "▱", "ƒ", "𝕗", "∀", "⋔", "⫙", "⨍", "¼", "½",
+    "⅓", "¼", "⅕", "⅙", "⅛", "⅔", "⅖", "¾", "¾", "⅗", "⅜", "⅘", "⅚", "⅝", "⅞", "⁄", "⌢", "𝒻", "≧",
+    "⪌", "ǵ", "γ", "ϝ", "⪆", "ğ", "ĝ", "г", "ġ", "≥", "⋛", "≥", "≧", "⩾", "⩾", "⪩", "⪀", "⪂", "⪄",
+    "⋛︀", "⪔", "𝔤", "≫", "⋙", "ℷ", "ѓ", "≷", "⪒", "⪥", "⪤", "≩", "⪊", "⪊", "⪈", "⪈", "≩", "⋧", "𝕘",
+    "`", "ℊ", "≳", "⪎", "⪐", ">", ">", "⪧", "⩺", "⋗", "⦕", "⩼", "⪆", "⥸", "⋗", "⋛", "⪌", "≷", "≳",
+    "≩︀", "≩︀", "⇔", " ", "½", "ℋ", "ъ", "↔", "⥈", "↭", "ℏ", "ĥ", "♥", "♥", "…", "⊹", "𝔥", "⤥", "⤦",
+    "⇿", "∻", "↩", "↪", "𝕙", "―", "𝒽", "ℏ", "ħ", "⁃", "‐", "í", "í", "⁣", "î", "î", "и", "е", "¡",
+    "¡", "⇔", "𝔦", "ì", "ì", "ⅈ", "⨌", "∭", "⧜", "℩", "ij", "ī", "ℑ", "ℐ", "ℑ", "ı", "⊷", "Ƶ", "∈",
+    "℅", "∞", "⧝", "ı", "∫", "⊺", "ℤ", "⊺", "⨗", "⨼", "ё", "į", "𝕚", "ι", "⨼", "¿", "¿", "𝒾", "∈",
+    "⋹", "⋵", "⋴", "⋳", "∈", "⁢", "ĩ", "і", "ï", "ï", "ĵ", "й", "𝔧", "ȷ", "𝕛", "𝒿", "ј", "є", "κ",
+    "ϰ", "ķ", "к", "𝔨", "ĸ", "х", "ќ", "𝕜", "𝓀", "⇚", "⇐", "⤛", "⤎", "≦", "⪋", "⥢", "ĺ", "⦴", "ℒ",
+    "λ", "⟨", "⦑", "⟨", "⪅", "«", "«", "←", "⇤", "⤟", "⤝", "↩", "↫", "⤹", "⥳", "↢", "⪫", "⤙", "⪭",
+    "⪭︀", "⤌", "❲", "{", "[", "⦋", "⦏", "⦍", "ľ", "ļ", "⌈", "{", "л", "⤶", "“", "„", "⥧", "⥋", "↲",
+    "≤", "←", "↢", "↽", "↼", "⇇", "↔", "⇆", "⇋", "↭", "⋋", "⋚", "≤", "≦", "⩽", "⩽", "⪨", "⩿", "⪁",
+    "⪃", "⋚︀", "⪓", "⪅", "⋖", "⋚", "⪋", "≶", "≲", "⥼", "⌊", "𝔩", "≶", "⪑", "↽", "↼", "⥪", "▄", "љ",
+    "≪", "⇇", "⌞", "⥫", "◺", "ŀ", "⎰", "⎰", "≨", "⪉", "⪉", "⪇", "⪇", "≨", "⋦", "⟬", "⇽", "⟦", "⟵",
+    "⟷", "⟼", "⟶", "↫", "↬", "⦅", "𝕝", "⨭", "⨴", "∗", "_", "◊", "◊", "⧫", "(", "⦓", "⇆", "⌟", "⇋",
+    "⥭", "‎", "⊿", "‹", "𝓁", "↰", "≲", "⪍", "⪏", "[", "‘", "‚", "ł", "<", "<", "⪦", "⩹", "⋖", "⋋",
+    "⋉", "⥶", "⩻", "⦖", "◃", "⊴", "◂", "⥊", "⥦", "≨︀", "≨︀", "∺", "¯", "¯", "♂", "✠", "✠", "↦", "↦",
+    "↧", "↤", "↥", "▮", "⨩", "м", "—", "∡", "𝔪", "℧", "µ", "µ", "∣", "*", "⫰", "·", "·", "−", "⊟",
+    "∸", "⨪", "⫛", "…", "∓", "⊧", "𝕞", "∓", "𝓂", "∾", "μ", "⊸", "⊸", "⋙̸", "≫⃒", "≫̸", "⇍", "⇎", "⋘̸",
+    "≪⃒", "≪̸", "⇏", "⊯", "⊮", "∇", "ń", "∠⃒", "≉", "⩰̸", "≋̸", "ʼn", "≉", "♮", "♮", "ℕ", " ", " ", "≎̸",
+    "≏̸", "⩃", "ň", "ņ", "≇", "⩭̸", "⩂", "н", "–", "≠", "⇗", "⤤", "↗", "↗", "≐̸", "≢", "⤨", "≂̸", "∄",
+    "∄", "𝔫", "≧̸", "≱", "≱", "≧̸", "⩾̸", "⩾̸", "≵", "≯", "≯", "⇎", "↮", "⫲", "∋", "⋼", "⋺", "∋", "њ",
+    "⇍", "≦̸", "↚", "‥", "≰", "↚", "↮", "≰", "≦̸", "⩽̸", "⩽̸", "≮", "≴", "≮", "⋪", "⋬", "∤", "𝕟", "¬",
+    "¬", "∉", "⋹̸", "⋵̸", "∉", "⋷", "⋶", "∌", "∌", "⋾", "⋽", "∦", "∦", "⫽⃥", "∂̸", "⨔", "⊀", "⋠", "⪯̸",
+    "⊀", "⪯̸", "⇏", "↛", "⤳̸", "↝̸", "↛", "⋫", "⋭", "⊁", "⋡", "⪰̸", "𝓃", "∤", "∦", "≁", "≄", "≄", "∤",
+    "∦", "⋢", "⋣", "⊄", "⫅̸", "⊈", "⊂⃒", "⊈", "⫅̸", "⊁", "⪰̸", "⊅", "⫆̸", "⊉", "⊃⃒", "⊉", "⫆̸", "≹", "ñ",
+    "ñ", "≸", "⋪", "⋬", "⋫", "⋭", "ν", "#", "№", " ", "⊭", "⤄", "≍⃒", "⊬", "≥⃒", ">⃒", "⧞", "⤂", "≤⃒",
+    "<⃒", "⊴⃒", "⤃", "⊵⃒", "∼⃒", "⇖", "⤣", "↖", "↖", "⤧", "Ⓢ", "ó", "ó", "⊛", "ô", "ô", "о", "⊝", "ő",
+    "⨸", "⊙", "⦼", "œ", "⦿", "𝔬", "˛", "ò", "ò", "⧁", "⦵", "Ω", "∮", "↺", "⦾", "⦻", "‾", "⧀", "ō",
+    "ω", "ο", "⦶", "⊖", "𝕠", "⦷", "⦹", "⊕", "∨", "↻", "º", "ℴ", "ℴ", "ª", "º", "⊶", "⩖", "⩗", "⩛",
+    "ℴ", "ø", "ø", "⊘", "õ", "õ", "⊗", "⨶", "ö", "ö", "⌽", "¶", "¶", "∥", "⫳", "⫽", "∂", "п", "%",
+    ".", "‰", "⊥", "‱", "𝔭", "φ", "ϕ", "ℳ", "☎", "π", "⋔", "ϖ", "ℏ", "ℎ", "ℏ", "+", "⨣", "⊞", "⨢",
+    "∔", "⨥", "⩲", "±", "±", "⨦", "⨧", "±", "⨕", "𝕡", "£", "£", "≺", "⪳", "⪷", "≼", "⪯", "≺", "⪷",
+    "≼", "⪯", "⪹", "⪵", "⋨", "≾", "′", "ℙ", "⪵", "⪹", "⋨", "∏", "⌮", "⌒", "⌓", "∝", "∝", "≾", "⊰",
+    "𝓅", "ψ", " ", "𝔮", "⨌", "𝕢", "⁗", "𝓆", "ℍ", "⨖", "?", "≟", "\"", "\"", "⇛", "⇒", "⤜", "⤏",
+    "⥤", "∽̱", "ŕ", "√", "⦳", "⟩", "⦒", "⦥", "⟩", "»", "»", "→", "⥵", "⇥", "⤠", "⤳", "⤞", "↪", "↬",
+    "⥅", "⥴", "↣", "↝", "⤚", "∶", "ℚ", "⤍", "❳", "}", "]", "⦌", "⦎", "⦐", "ř", "ŗ", "⌉", "}", "р",
+    "⤷", "⥩", "”", "”", "↳", "ℜ", "ℛ", "ℜ", "ℝ", "▭", "®", "®", "⥽", "⌋", "𝔯", "⇁", "⇀", "⥬", "ρ",
+    "ϱ", "→", "↣", "⇁", "⇀", "⇄", "⇌", "⇉", "↝", "⋌", "˚", "≓", "⇄", "⇌", "‏", "⎱", "⎱", "⫮", "⟭",
+    "⇾", "⟧", "⦆", "𝕣", "⨮", "⨵", ")", "⦔", "⨒", "⇉", "›", "𝓇", "↱", "]", "’", "’", "⋌", "⋊", "▹",
+    "⊵", "▸", "⧎", "⥨", "℞", "ś", "‚", "≻", "⪴", "⪸", "š", "≽", "⪰", "ş", "ŝ", "⪶", "⪺", "⋩", "⨓",
+    "≿", "с", "⋅", "⊡", "⩦", "⇘", "⤥", "↘", "↘", "§", "§", ";", "⤩", "∖", "∖", "✶", "𝔰", "⌢", "♯",
+    "щ", "ш", "∣", "∥", "\u{AD}", "\u{AD}", "σ", "ς", "ς", "∼", "⩪", "≃", "≃", "⪞", "⪠", "⪝", "⪟",
+    "≆", "⨤", "⥲", "←", "∖", "⨳", "⧤", "∣", "⌣", "⪪", "⪬", "⪬︀", "ь", "/", "⧄", "⌿", "𝕤", "♠", "♠",
+    "∥", "⊓", "⊓︀", "⊔", "⊔︀", "⊏", "⊑", "⊏", "⊑", "⊐", "⊒", "⊐", "⊒", "□", "□", "▪", "▪", "→", "𝓈",
+    "∖", "⌣", "⋆", "☆", "★", "ϵ", "ϕ", "¯", "⊂", "⫅", "⪽", "⊆", "⫃", "⫁", "⫋", "⊊", "⪿", "⥹", "⊂",
+    "⊆", "⫅", "⊊", "⫋", "⫇", "⫕", "⫓", "≻", "⪸", "≽", "⪰", "⪺", "⪶", "⋩", "≿", "∑", "♪", "⊃", "¹",
+    "²", "³", "⫆", "⪾", "⫘", "⊇", "⫄", "⟉", "⫗", "⥻", "⫂", "⫌", "⊋", "⫀", "⊃", "⊇", "⫆", "⊋", "⫌",
+    "⫈", "⫔", "⫖", "⇙", "⤦", "↙", "↙", "⤪", "ß", "ß", "⌖", "τ", "⎴", "ť", "ţ", "т", "⃛", "⌕", "𝔱",
+    "∴", "∴", "θ", "ϑ", "ϑ", "≈", "∼", " ", "≈", "∼", "þ", "þ", "˜", "×", "×", "⊠", "⨱", "⨰", "∭",
+    "⤨", "⊤", "⌶", "⫱", "𝕥", "⫚", "⤩", "‴", "™", "▵", "▿", "◃", "⊴", "≜", "▹", "⊵", "◬", "≜", "⨺",
+    "⨹", "⧍", "⨻", "⏢", "𝓉", "ц", "ћ", "ŧ", "≬", "↞", "↠", "⇑", "⥣", "ú", "ú", "↑", "ў", "ŭ", "û",
+    "û", "у", "⇅", "ű", "⥮", "⥾", "𝔲", "ù", "ù", "↿", "↾", "▀", "⌜", "⌜", "⌏", "◸", "ū", "¨", "¨",
+    "ų", "𝕦", "↑", "↕", "↿", "↾", "⊎", "υ", "ϒ", "υ", "⇈", "⌝", "⌝", "⌎", "ů", "◹", "𝓊", "⋰", "ũ",
+    "▵", "▴", "⇈", "ü", "ü", "⦧", "⇕", "⫨", "⫩", "⊨", "⦜", "ϵ", "ϰ", "∅", "ϕ", "ϖ", "∝", "↕", "ϱ",
+    "ς", "⊊︀", "⫋︀", "⊋︀", "⫌︀", "ϑ", "⊲", "⊳", "в", "⊢", "∨", "⊻", "≚", "⋮", "|", "|", "𝔳", "⊲", "⊂⃒",
+    "⊃⃒", "𝕧", "∝", "⊳", "𝓋", "⫋︀", "⊊︀", "⫌︀", "⊋︀", "⦚", "ŵ", "⩟", "∧", "≙", "℘", "𝔴", "𝕨", "℘", "≀",
+    "≀", "𝓌", "⋂", "◯", "⋃", "▽", "𝔵", "⟺", "⟷", "ξ", "⟸", "⟵", "⟼", "⋻", "⨀", "𝕩", "⨁", "⨂", "⟹",
+    "⟶", "𝓍", "⨆", "⨄", "△", "⋁", "⋀", "ý", "ý", "я", "ŷ", "ы", "¥", "¥", "𝔶", "ї", "𝕪", "𝓎", "ю",
+    "ÿ", "ÿ", "ź", "ž", "з", "ż", "ℨ", "ζ", "𝔷", "ж", "⇝", "𝕫", "𝓏", "‍", "‌",
+];
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
new file mode 100644
index 0000000..7b7962b
--- /dev/null
+++ b/src/construct/blank_line.rs
@@ -0,0 +1,61 @@
+//! Blank lines are a construct that occurs in the flow content type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! blank_line ::= *(' ' '\t')
+//! ```
+//!
+//! Blank lines are sometimes needed, such as to differentiate a paragraph
+//! from another paragraph.
+//! In several cases, blank lines are not needed between flow constructs,
+//! such as between two headings.
+//! Sometimes, whether blank lines are present, changes the behavior of how
+//! HTML is rendered, such as whether blank lines are present between list
+//! items in a list.
+//! More than one blank line is never needed in `CommonMark`.
+//!
+//! Because blank lines can be empty (line endings are not considered part of
+//! it), and events cannot be empty, blank lines are not present as a token.
+//!
+//! ## References
+//!
+//! *   [`blank-line.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/blank-line.js)
+//! *   [*§ 4.9 Blank lines* in `CommonMark`](https://spec.commonmark.org/0.30/#blank-lines)
+//!
+//! <!-- To do: link `flow`, `heading`, `list`, `paragraph` -->
+
+use crate::construct::partial_whitespace::start as whitespace;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a blank line.
+///
+/// Note: `␠` represents a space character.
+///
+/// ```markdown
+/// |␠␠
+/// |
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::BlankLineWhitespace),
+        |_ok| Box::new(after),
+    )(tokenizer, code)
+}
+
+/// After zero or more spaces or tabs, before a line ending or EOF.
+///
+/// Note: `␠` represents a space character.
+///
+/// ```markdown
+/// |␠␠
+/// |
+/// ```
+fn after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            (State::Ok, Some(vec![code]))
+        }
+        _ => (State::Nok, None),
+    }
+}
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
new file mode 100644
index 0000000..5ea995e
--- /dev/null
+++ b/src/construct/character_escape.rs
@@ -0,0 +1,69 @@
+//! Character escapes are a construct that occurs in the string and text
+//! content types.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! character_escape ::= '\\' ascii_punctuation
+//! ```
+//!
+//! Like much of markdown, there are no “invalid” character escapes: just a
+//! slash, or a slash followed by anything other than an ASCII punctuation
+//! character, is exactly that: just a slash.
+//! To escape (most) arbitrary characters, use a
+//! [character reference][] instead
+//! (as in, `&amp;`, `&#123;`, or say `&#x9;`).
+//! It is also possible to escape a line ending in text with a similar
+//! construct: a backslash followed by a line ending (that is part of the
+//! construct instead of ending it).
+//!
+//! ## References
+//!
+//! *   [`character-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/character-escape.js)
+//! *   [*§ 2.4 Backslash escapes* in `CommonMark`](https://spec.commonmark.org/0.30/#backslash-escapes)
+//!
+//! [character reference]: crate::construct::character_reference
+//!
+//! <!-- To do: link `hard_break_escape`, `string`, `text` -->
+
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a character escape.
+///
+/// ```markdown
+/// a|\*b
+/// a|\b
+/// a|\ b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('\\') => {
+            tokenizer.enter(TokenType::CharacterEscape);
+            tokenizer.enter(TokenType::CharacterEscapeMarker);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterEscapeMarker);
+            (State::Fn(Box::new(inside)), None)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside a character escape, after `\`.
+///
+/// ```markdown
+/// a\|*b
+/// a\|b
+/// a\| b
+/// ```
+fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char.is_ascii_punctuation() => {
+            tokenizer.enter(TokenType::CharacterEscapeValue);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterEscapeValue);
+            tokenizer.exit(TokenType::CharacterEscape);
+            (State::Ok, None)
+        }
+        _ => (State::Nok, None),
+    }
+}
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
new file mode 100644
index 0000000..27275d5
--- /dev/null
+++ b/src/construct/character_reference.rs
@@ -0,0 +1,237 @@
+//! Character references are a construct that occurs in the string and text
+//! content types.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! character_reference ::= '&' (numeric | named) ';'
+//!
+//! numeric ::= '#' (hexadecimal | decimal)
+//! ; Note: Limit of `6` imposed as all bigger numbers are invalid:
+//! hexadecimal ::= ('x' | 'X') 1*6(ascii_hexdigit)
+//! ; Note: Limit of `7` imposed as all bigger numbers are invalid:
+//! decimal ::= 1*7(ascii_digit)
+//! ; Note: Limit of `31` imposed by `CounterClockwiseContourIntegral`:
+//! ; Note: Limited to any known named character reference (see `constants.rs`)
+//! named ::= 1*31(ascii_alphanumeric)
+//! ```
+//!
+//! Like much of markdown, there are no “invalid” character references.
+//! However, for security reasons, several numeric character references parse
+//! fine but are not rendered as their corresponding character and they are
+//! instead replaced by a U+FFFD REPLACEMENT CHARACTER (`�`).
+//! See [`decode_numeric_character_reference`][decode_numeric] for more info.
+//!
+//! To escape ASCII punctuation characters, use the terser
+//! [character escape][character_escape] construct instead (as in, `\&`).
+//!
+//! Character references in markdown are not the same as character references
+//! in HTML.
+//! Notably, HTML allows several character references without a closing
+//! semicolon.
+//! See [*§ 13.2.5.72 Character reference state* in the HTML spec][html] for more info.
+//!
+//! Character references are parsed insensitive to casing.
+//! The casing of hexadecimal numeric character references has no effect.
+//! The casing of named character references does not matter when parsing them,
+//! but does affect whether they match.
+//! Depending on the name, one or more cases are allowed, such as that `AMP`
+//! and `amp` are both allowed but other cases are not.
+//! See [`CHARACTER_REFERENCE_NAMES`][character_reference_names] for which
+//! names match.
+//!
+//! ## References
+//!
+//! *   [`character-reference.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/character-reference.js)
+//! *   [*§ 2.5 Entity and numeric character references* in `CommonMark`](https://spec.commonmark.org/0.30/#entity-and-numeric-character-references)
+//!
+//! [character_escape]: crate::construct::character_reference
+//! [decode_numeric]: crate::util::decode_numeric_character_reference
+//! [character_reference_names]: crate::constant::CHARACTER_REFERENCE_NAMES
+//! [html]: https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
+//!
+//! <!-- To do: link `string`, `text` -->
+
+use crate::constant::{
+    CHARACTER_REFERENCE_DECIMAL_SIZE_MAX, CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX,
+    CHARACTER_REFERENCE_NAMED_SIZE_MAX, CHARACTER_REFERENCE_NAMES,
+};
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Kind of a character reference.
+#[derive(Debug, Clone)]
+pub enum Kind {
+    /// Numeric decimal character reference (`&#x9;`).
+    Decimal,
+    /// Numeric hexadecimal character reference (`&#123;`).
+    Hexadecimal,
+    /// Named character reference (`&amp;`).
+    Named,
+}
+
+/// State needed to parse character references.
+#[derive(Debug, Clone)]
+struct Info {
+    /// All parsed characters.
+    buffer: Vec<char>,
+    /// Kind of character reference.
+    kind: Kind,
+}
+
+/// Start of a character reference.
+///
+/// ```markdown
+/// a|&amp;b
+/// a|&#123;b
+/// a|&#x9;b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('&') => {
+            tokenizer.enter(TokenType::CharacterReference);
+            tokenizer.enter(TokenType::CharacterReferenceMarker);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterReferenceMarker);
+            (State::Fn(Box::new(open)), None)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside a character reference, after `&`, before `#` for numeric references
+/// or an alphanumeric for named references.
+///
+/// ```markdown
+/// a&|amp;b
+/// a&|#123;b
+/// a&|#x9;b
+/// ```
+fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if let Code::Char('#') = code {
+        tokenizer.enter(TokenType::CharacterReferenceMarkerNumeric);
+        tokenizer.consume(code);
+        tokenizer.exit(TokenType::CharacterReferenceMarkerNumeric);
+        (State::Fn(Box::new(numeric)), None)
+    } else {
+        tokenizer.enter(TokenType::CharacterReferenceValue);
+        value(
+            tokenizer,
+            code,
+            Info {
+                buffer: vec![],
+                kind: Kind::Named,
+            },
+        )
+    }
+}
+
+/// Inside a numeric character reference, right before `x` for hexadecimals,
+/// or a digit for decimals.
+///
+/// ```markdown
+/// a&#|123;b
+/// a&#|x9;b
+/// ```
+fn numeric(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == 'x' || char == 'X' => {
+            tokenizer.enter(TokenType::CharacterReferenceMarkerHexadecimal);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterReferenceMarkerHexadecimal);
+            tokenizer.enter(TokenType::CharacterReferenceValue);
+
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    value(
+                        tokenizer,
+                        code,
+                        Info {
+                            buffer: vec![],
+                            kind: Kind::Hexadecimal,
+                        },
+                    )
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.enter(TokenType::CharacterReferenceValue);
+
+            value(
+                tokenizer,
+                code,
+                Info {
+                    buffer: vec![],
+                    kind: Kind::Decimal,
+                },
+            )
+        }
+    }
+}
+
+/// Inside a character reference value, after the markers (`&#x`, `&#`, or
+/// `&`) that define its kind, but before the `;`.
+/// The character reference kind defines what and how many characters are
+/// allowed.
+///
+/// ```markdown
+/// a&a|mp;b
+/// a&#1|23;b
+/// a&#x|9;b
+/// ```
+fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+    match code {
+        Code::Char(';') if !info.buffer.is_empty() => {
+            tokenizer.exit(TokenType::CharacterReferenceValue);
+            let value = info.buffer.iter().collect::<String>();
+
+            if let Kind::Named = info.kind {
+                if !CHARACTER_REFERENCE_NAMES.contains(&value.as_str()) {
+                    return (State::Nok, Some(vec![code]));
+                }
+            }
+
+            tokenizer.enter(TokenType::CharacterReferenceMarkerSemi);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterReferenceMarkerSemi);
+            tokenizer.exit(TokenType::CharacterReference);
+            (State::Ok, None)
+        }
+        Code::Char(char) => {
+            let len = info.buffer.len();
+
+            let cont = match info.kind {
+                Kind::Hexadecimal
+                    if char.is_ascii_hexdigit()
+                        && len < CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX =>
+                {
+                    true
+                }
+                Kind::Decimal
+                    if char.is_ascii_digit() && len < CHARACTER_REFERENCE_DECIMAL_SIZE_MAX =>
+                {
+                    true
+                }
+                Kind::Named
+                    if char.is_ascii_alphanumeric() && len < CHARACTER_REFERENCE_NAMED_SIZE_MAX =>
+                {
+                    true
+                }
+                _ => false,
+            };
+
+            if cont {
+                let mut clone = info;
+                clone.buffer.push(char);
+                tokenizer.consume(code);
+                (
+                    State::Fn(Box::new(|tokenizer, code| value(tokenizer, code, clone))),
+                    None,
+                )
+            } else {
+                (State::Nok, None)
+            }
+        }
+        _ => (State::Nok, None),
+    }
+}
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
new file mode 100644
index 0000000..2068a62
--- /dev/null
+++ b/src/construct/code_fenced.rs
@@ -0,0 +1,581 @@
+//! Code (fenced) is a construct that occurs in the flow content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! code_fenced ::= fence_open *( eol *code ) [ eol fence_close ]
+//!
+//! fence_open ::= sequence [ 1*space_or_tab info [ 1*space_or_tab meta ] ] *space_or_tab
+//! ; Restriction: the number of markers in the closing fence sequence must be
+//! ; equal to or greater than the number of markers in the opening fence
+//! ; sequence.
+//! ; Restriction: the marker in the closing fence sequence must match the
+//! ; marker in the opening fence sequence
+//! fence_close ::= sequence *space_or_tab
+//! sequence ::= 3*'`' | 3*'~'
+//! info ::= 1*text
+//! meta ::= 1*text *( *space_or_tab 1*text )
+//!
+//! ; Restriction: the `` ` `` character cannot occur in `text` if it is the
+//! ; marker of the opening fence sequence.
+//! text ::= code - eol - space_or_tab
+//! eol ::= '\r' | '\r\n' | '\n'
+//! space_or_tab ::= ' ' | '\t'
+//! code ::= . ; any unicode code point (other than line endings).
+//! ```
+//!
+//! The above grammar does not show how whitespace is handled.
+//! To parse code (fenced), let `X` be the number of whitespace characters
+//! before the opening fence sequence.
+//! Each line of content is then allowed (not required) to be indented with up
+//! to `X` spaces or tabs, which are then ignored as an indent instead of being
+//! considered as part of the code.
+//! This indent does not affect the closing fence.
+//! It can be indented up to a separate 3 spaces or tabs.
+//! A bigger indent makes it part of the code instead of a fence.
+//!
+//! Code (fenced) relates to both the `<pre>` and the `<code>` elements in
+//! HTML.
+//! See [*§ 4.4.3 The `pre` element*][html-pre] and the [*§ 4.5.15 The `code`
+//! element*][html-code] in the HTML spec for more info.
+//!
+//! The optional `meta` part is ignored: it is not used when parsing or
+//! rendering.
+//! The optional `info` part is used and is expected to specify the programming
+//! language that the code is in.
+//! Which value it holds depends on what your syntax highlighter supports, if
+//! one is used.
+//! The `info` is, when rendering to HTML, typically exposed as a class.
+//! This behavior stems from the HTML spec ([*§ 4.5.15 The `code`
+//! element*][html-code]).
+//! For example:
+//!
+//! ```markdown
+//! ~~~css
+//! * { color: tomato }
+//! ~~~
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <pre><code class="language-css">* { color: tomato }
+//! </code></pre>
+//! ```
+//!
+//! The `info` and `meta` parts are interpreted as the string content type.
+//! That means that character escapes and character reference are allowed.
+//!
+//! In markdown, it is also possible to use code (text) in the text content
+//! type.
+//! It is also possible to create code with the
+//! [code (indented)][code-indented] construct.
+//! That construct is less explicit, different from code (text), and has no
+//! support for specifying the programming language, so it is recommended to
+//! use code (fenced) instead of code (indented).
+//!
+//! ## References
+//!
+//! *   [`code-fenced.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js)
+//! *   [*§ 4.5 Fenced code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#fenced-code-blocks)
+//!
+//! [code-indented]: crate::construct::code_indented
+//! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
+//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+//!
+//! <!-- To do: link `flow`, `text`, `code_text`, `string` -->
+
+use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
+use crate::construct::partial_whitespace::start as whitespace;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::get_span;
+
+/// Kind of fences.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Kind {
+    /// Grave accent (tick) code.
+    GraveAccent,
+    /// Tilde code.
+    Tilde,
+}
+
+/// State needed to parse code (fenced).
+#[derive(Debug, Clone)]
+struct Info {
+    /// Number of markers on the opening fence sequence.
+    size: usize,
+    /// Number of tabs or spaces of indentation before the opening fence
+    /// sequence.
+    prefix: usize,
+    /// Kind of fences.
+    kind: Kind,
+}
+
+/// Start of fenced code.
+///
+/// ```markdown
+/// | ~~~js
+///  console.log(1);
+///  ~~~
+/// ```
+///
+/// Parsing note: normally, the prefix is already stripped.
+/// `flow.rs` makes sure that that doesn’t happen for code (fenced), as we need
+/// it.
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::CodeFenced);
+    tokenizer.enter(TokenType::CodeFencedFence);
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+        |_ok| Box::new(before_sequence_open),
+    )(tokenizer, code)
+}
+
+/// Inside the opening fence, after an optional prefix, before a sequence.
+///
+/// ```markdown
+/// |~~~js
+/// console.log(1);
+/// ~~~
+/// ```
+fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    let tail = tokenizer.events.last();
+    let mut prefix = 0;
+
+    if let Some(event) = tail {
+        if event.token_type == TokenType::Whitespace {
+            let span = get_span(&tokenizer.events, tokenizer.events.len() - 1);
+            prefix = span.end_index - span.start_index;
+        }
+    }
+
+    match code {
+        Code::Char(char) if char == '`' || char == '~' => {
+            tokenizer.enter(TokenType::CodeFencedFenceSequence);
+            sequence_open(
+                tokenizer,
+                Info {
+                    prefix,
+                    size: 0,
+                    kind: if char == '`' {
+                        Kind::GraveAccent
+                    } else {
+                        Kind::Tilde
+                    },
+                },
+                code,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside the opening fence sequence.
+///
+/// ```markdown
+/// ~|~~js
+/// console.log(1);
+/// ~~~
+/// ```
+fn sequence_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    let marker = if info.kind == Kind::GraveAccent {
+        '`'
+    } else {
+        '~'
+    };
+
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    let mut info = info;
+                    info.size += 1;
+                    sequence_open(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => {
+            if info.size < CODE_FENCED_SEQUENCE_SIZE_MIN {
+                (State::Nok, None)
+            } else {
+                tokenizer.exit(TokenType::CodeFencedFenceSequence);
+                tokenizer.attempt(
+                    |tokenizer, code| {
+                        whitespace(tokenizer, code, TokenType::CodeFencedFenceWhitespace)
+                    },
+                    |_ok| Box::new(|tokenizer, code| info_before(tokenizer, info, code)),
+                )(tokenizer, code)
+            }
+        }
+    }
+}
+
+/// Inside the opening fence, after the sequence (and optional whitespace), before the info.
+///
+/// ```markdown
+/// ~~~|js
+/// console.log(1);
+/// ~~~
+/// ```
+fn info_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFencedFence);
+            at_break(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.enter(TokenType::CodeFencedFenceInfo);
+            tokenizer.enter(TokenType::ChunkString);
+            info_inside(tokenizer, info, code, vec![])
+        }
+    }
+}
+
+/// Inside the opening fence info.
+///
+/// ```markdown
+/// ~~~j|s
+/// console.log(1);
+/// ~~~
+/// ```
+fn info_inside(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+    codes: Vec<Code>,
+) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            println!("to do: subtokenize: {:?}", codes);
+            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::CodeFencedFenceInfo);
+            tokenizer.exit(TokenType::CodeFencedFence);
+            at_break(tokenizer, info, code)
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            println!("to do: subtokenize: {:?}", codes);
+            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::CodeFencedFenceInfo);
+            tokenizer.attempt(
+                |tokenizer, code| whitespace(tokenizer, code, TokenType::CodeFencedFenceWhitespace),
+                |_ok| Box::new(|tokenizer, code| meta_before(tokenizer, info, code)),
+            )(tokenizer, code)
+        }
+        Code::Char(char) if char == '`' && info.kind == Kind::GraveAccent => (State::Nok, None),
+        Code::Char(_) => {
+            let mut codes = codes;
+            codes.push(code);
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    info_inside(tokenizer, info, code, codes)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// Inside the opening fence, after the info and whitespace, before the meta.
+///
+/// ```markdown
+/// ~~~js |eval
+/// console.log(1);
+/// ~~~
+/// ```
+fn meta_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFencedFence);
+            at_break(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.enter(TokenType::CodeFencedFenceMeta);
+            tokenizer.enter(TokenType::ChunkString);
+            meta(tokenizer, info, code)
+        }
+    }
+}
+
+/// Inside the opening fence meta.
+///
+/// ```markdown
+/// ~~~js e|val
+/// console.log(1);
+/// ~~~
+/// ```
+fn meta(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::CodeFencedFenceMeta);
+            tokenizer.exit(TokenType::CodeFencedFence);
+            at_break(tokenizer, info, code)
+        }
+        Code::Char(char) if char == '`' && info.kind == Kind::GraveAccent => (State::Nok, None),
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| meta(tokenizer, info, code))),
+                None,
+            )
+        }
+    }
+}
+
+/// At an eol/eof in code, before a closing fence or before content.
+///
+/// ```markdown
+/// ~~~js|
+/// aa|
+/// ~~~
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    let clone = info.clone();
+
+    match code {
+        Code::None => after(tokenizer, code),
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt(
+            |tokenizer, code| {
+                tokenizer.enter(TokenType::LineEnding);
+                tokenizer.consume(code);
+                tokenizer.exit(TokenType::LineEnding);
+                (
+                    State::Fn(Box::new(|tokenizer, code| {
+                        close_before(tokenizer, info, code)
+                    })),
+                    None,
+                )
+            },
+            |ok| {
+                if ok {
+                    Box::new(after)
+                } else {
+                    Box::new(|tokenizer, code| {
+                        tokenizer.enter(TokenType::LineEnding);
+                        tokenizer.consume(code);
+                        tokenizer.exit(TokenType::LineEnding);
+                        (
+                            State::Fn(Box::new(|tokenizer, code| {
+                                content_start(tokenizer, clone, code)
+                            })),
+                            None,
+                        )
+                    })
+                }
+            },
+        )(tokenizer, code),
+        _ => unreachable!("unexpected non-eol/eof after `at_break` `{:?}`", code),
+    }
+}
+
+/// Before a closing fence, before optional whitespace.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// |~~~
+///
+/// ~~~js
+/// console.log('1')
+/// |  ~~~
+/// ```
+fn close_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::CodeFencedFence);
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+        |_ok| Box::new(|tokenizer, code| close_sequence_before(tokenizer, info, code)),
+    )(tokenizer, code)
+}
+
+/// In a closing fence, after optional whitespace, before sequence.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// |~~~
+///
+/// ~~~js
+/// console.log('1')
+///   |~~~
+/// ```
+fn close_sequence_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    let tail = tokenizer.events.last();
+    let mut prefix = 0;
+    let marker = if info.kind == Kind::GraveAccent {
+        '`'
+    } else {
+        '~'
+    };
+
+    if let Some(event) = tail {
+        if event.token_type == TokenType::Whitespace {
+            let span = get_span(&tokenizer.events, tokenizer.events.len() - 1);
+            prefix = span.end_index - span.start_index;
+        }
+    }
+
+    // To do: 4+ should be okay if code (indented) is turned off!
+    if prefix >= TAB_SIZE {
+        return (State::Nok, None);
+    }
+
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.enter(TokenType::CodeFencedFenceSequence);
+            close_sequence(tokenizer, info, code, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In the closing fence sequence.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// ~|~~
+/// ```
+fn close_sequence(tokenizer: &mut Tokenizer, info: Info, code: Code, size: usize) -> StateFnResult {
+    let marker = if info.kind == Kind::GraveAccent {
+        '`'
+    } else {
+        '~'
+    };
+
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    close_sequence(tokenizer, info, code, size + 1)
+                })),
+                None,
+            )
+        }
+        _ if size >= CODE_FENCED_SEQUENCE_SIZE_MIN && size >= info.size => {
+            tokenizer.exit(TokenType::CodeFencedFenceSequence);
+            tokenizer.attempt(
+                |tokenizer, code| whitespace(tokenizer, code, TokenType::CodeFencedFenceWhitespace),
+                |_ok| Box::new(close_whitespace_after),
+            )(tokenizer, code)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After the closing fence sequence after optional whitespace.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// ~~~ |
+/// ```
+fn close_whitespace_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFencedFence);
+            (State::Ok, Some(vec![code]))
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Before code content, definitely not before a closing fence.
+///
+/// ```markdown
+/// ~~~js
+/// |aa
+/// ~~~
+/// ```
+fn content_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            at_break(tokenizer, info, code)
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') if info.prefix > 0 => {
+            tokenizer.enter(TokenType::Whitespace);
+            content_prefix(tokenizer, info, 0, code)
+        }
+        _ => {
+            tokenizer.enter(TokenType::CodeFlowChunk);
+            content_continue(tokenizer, info, code)
+        }
+    }
+}
+
+/// Before code content, in a prefix.
+///
+/// ```markdown
+///   ~~~js
+///  | aa
+///   ~~~
+/// ```
+fn content_prefix(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    prefix: usize,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') if info.prefix > prefix => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    content_prefix(tokenizer, info, prefix + 1, code)
+                })),
+                None,
+            )
+        }
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::Whitespace);
+            at_break(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.exit(TokenType::Whitespace);
+            tokenizer.enter(TokenType::CodeFlowChunk);
+            content_continue(tokenizer, info, code)
+        }
+    }
+}
+
+/// In code content.
+///
+/// ```markdown
+/// ~~~js
+/// |ab
+/// a|b
+/// ab|
+/// ~~~
+/// ```
+fn content_continue(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFlowChunk);
+            at_break(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    content_continue(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// After fenced code.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// ~~~|
+/// ```
+fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.exit(TokenType::CodeFenced);
+    (State::Ok, Some(vec![code]))
+}
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
new file mode 100644
index 0000000..6bf089b
--- /dev/null
+++ b/src/construct/code_indented.rs
@@ -0,0 +1,190 @@
+//! Code (indented) is a construct that occurs in the flow content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! code_indented ::= indented_filled_line *( eol *( blank_line eol ) indented_filled_line )
+//!
+//! ; Restriction: at least one `code` must not be whitespace.
+//! indented_filled_line ::= 4space_or_tab *code
+//! blank_line ::= *space_or_tab
+//! eol ::= '\r' | '\r\n' | '\n'
+//! code ::= . ; any unicode code point (other than line endings).
+//! space_or_tab ::= ' ' | '\t'
+//! ```
+//!
+//! Code (indented) relates to both the `<pre>` and the `<code>` elements in
+//! HTML.
+//! See [*§ 4.4.3 The `pre` element*][html-pre] and the [*§ 4.5.15 The `code`
+//! element*][html-code] in the HTML spec for more info.
+//!
+//! In markdown, it is also possible to use code (text) in the text content
+//! type.
+//! It is also possible to create code with the [code (fenced)][code-fenced]
+//! construct.
+//! That construct is more explicit, more similar to code (text), and has
+//! support for specifying the programming language that the code is in, so it
+//! is recommended to use that instead of indented code.
+//!
+//! ## References
+//!
+//! *   [`code-indented.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-indented.js)
+//! *   [*§ 4.4 Indented code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#indented-code-blocks)
+//!
+//! [code-fenced]: crate::construct::code_fenced
+//! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
+//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+//!
+//! <!-- To do: link `flow`, `code_text` -->
+
+use crate::constant::TAB_SIZE;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of code (indented).
+///
+/// ```markdown
+/// |    asd
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char(' ' | '\t') => {
+            tokenizer.enter(TokenType::CodeIndented);
+            tokenizer.enter(TokenType::CodeIndentedPrefixWhitespace);
+            indent(tokenizer, code, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside the initial whitespace.
+///
+/// ```markdown
+///  |   asd
+///   |  asd
+///    | asd
+///     |asd
+/// ```
+///
+/// > **Parsing note**: it is not needed to check if this first line is a
+/// > filled line (that it has a non-whitespace character), because blank lines
+/// > are parsed already, so we never run into that.
+fn indent(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+    match code {
+        _ if size == TAB_SIZE => {
+            tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace);
+            at_break(tokenizer, code)
+        }
+        Code::VirtualSpace | Code::Char(' ' | '\t') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    indent(tokenizer, code, size + 1)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// At a break.
+///
+/// ```markdown
+///     |asd
+///     asd|
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => after(tokenizer, code),
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer
+            .attempt(further_start, |ok| {
+                Box::new(if ok { at_break } else { after })
+            })(tokenizer, code),
+        _ => {
+            tokenizer.enter(TokenType::CodeFlowChunk);
+            content(tokenizer, code)
+        }
+    }
+}
+
+/// Inside code content.
+///
+/// ```markdown
+///     |ab
+///     a|b
+///     ab|
+/// ```
+fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFlowChunk);
+            at_break(tokenizer, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(content)), None)
+        }
+    }
+}
+
+/// After indented code.
+///
+/// ```markdown
+///     ab|
+/// ```
+fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.exit(TokenType::CodeIndented);
+    (State::Ok, Some(vec![code]))
+}
+
+/// Right at a line ending, trying to parse another indent.
+///
+/// ```markdown
+///     ab|
+///     cd
+/// ```
+fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // To do: `nok` if lazy line.
+    match code {
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.enter(TokenType::LineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::LineEnding);
+            (State::Fn(Box::new(further_start)), None)
+        }
+        Code::VirtualSpace | Code::Char(' ' | '\t') => {
+            tokenizer.enter(TokenType::CodeIndentedPrefixWhitespace);
+            further_indent(tokenizer, code, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside further whitespace.
+///
+/// ```markdown
+///     asd
+///   |  asd
+/// ```
+fn further_indent(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+    match code {
+        _ if size == TAB_SIZE => {
+            tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace);
+            (State::Ok, Some(vec![code]))
+        }
+        Code::VirtualSpace | Code::Char(' ' | '\t') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    further_indent(tokenizer, code, size + 1)
+                })),
+                None,
+            )
+        }
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace);
+            further_start(tokenizer, code)
+        }
+        _ => (State::Nok, None),
+    }
+}
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
new file mode 100644
index 0000000..b3aef1b
--- /dev/null
+++ b/src/construct/heading_atx.rs
@@ -0,0 +1,175 @@
+//! Heading (atx) is a construct that occurs in the flow content type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! heading_atx ::= 1*6'#' [ 1*space_or_tab code [ 1*space_or_tab 1*'#' ] ] *space_or_tab
+//!
+//! code ::= . ; any unicode code point (other than line endings).
+//! space_or_tab ::= ' ' | '\t'
+//! ```
+//!
+//! Headings in markdown relate to the `<h1>` through `<h6>` elements in HTML.
+//! See [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements* in the
+//! HTML spec][html] for more info.
+//!
+//! `CommonMark` introduced the requirement on whitespace existing after the
+//! opening sequence and before text.
+//! In older markdown versions, this was not required, and headings would form
+//! without it.
+//!
+//! In markdown, it is also possible to create headings with the setext heading
+//! construct.
+//! The benefit of setext headings is that their text can include line endings.
+//! However, their limit is that they cannot form `<h3>` through `<h6>`
+//! headings.
+//! Due to this limitation, it is recommended to use atx headings.
+//!
+//! > 🏛 **Background**: the word *setext* originates from a small markup
+//! > language by Ian Feldman from 1991.
+//! > See [*§ Setext* on Wikipedia][wiki-setext] for more info.
+//! > The word *atx* originates from a tiny markup language by Aaron Swartz
+//! > from 2002.
+//! > See [*§ atx, the true structured text format* on `aaronsw.com`][atx] for
+//! > more info.
+//!
+//! ## References
+//!
+//! *   [`heading-atx.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/heading-atx.js)
+//! *   [*§ 4.2 ATX headings* in `CommonMark`](https://spec.commonmark.org/0.30/#atx-headings)
+//!
+//! [html]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
+//! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
+//! [atx]: http://www.aaronsw.com/2002/atx/
+//!
+//! <!-- To do: link `flow`, `setext` -->
+
+use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a heading (atx).
+///
+/// ```markdown
+/// |## alpha
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if Code::Char('#') == code {
+        tokenizer.enter(TokenType::AtxHeading);
+        tokenizer.enter(TokenType::AtxHeadingSequence);
+        sequence_open(tokenizer, code, 0)
+    } else {
+        (State::Nok, None)
+    }
+}
+
+/// In the opening sequence.
+///
+/// ```markdown
+/// #|# alpha
+/// ```
+fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnResult {
+    match code {
+        Code::None
+        | Code::CarriageReturnLineFeed
+        | Code::VirtualSpace
+        | Code::Char('\t' | '\n' | '\r' | ' ')
+            if rank > 0 =>
+        {
+            tokenizer.exit(TokenType::AtxHeadingSequence);
+            at_break(tokenizer, code)
+        }
+        Code::Char('#') if rank < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    sequence_open(tokenizer, code, rank + 1)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After something but before something else.
+///
+/// ```markdown
+/// ## |alpha
+/// ## alpha| bravo
+/// ## alpha |bravo
+/// ## alpha bravo|##
+/// ## alpha bravo ##|
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::AtxHeading);
+            (State::Ok, Some(vec![code]))
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.enter(TokenType::AtxHeadingWhitespace);
+            whitespace(tokenizer, code)
+        }
+        Code::Char('#') => {
+            tokenizer.enter(TokenType::AtxHeadingSequence);
+            further_sequence(tokenizer, code)
+        }
+        Code::Char(_) => {
+            tokenizer.enter(TokenType::AtxHeadingText);
+            data(tokenizer, code)
+        }
+    }
+}
+
+/// In a further sequence (after whitespace).
+/// Could be normal “visible” hashes in the heading or a final sequence.
+///
+/// ```markdown
+/// ## alpha #|#
+/// ```
+fn further_sequence(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if let Code::Char('#') = code {
+        tokenizer.consume(code);
+        (State::Fn(Box::new(further_sequence)), None)
+    } else {
+        tokenizer.exit(TokenType::AtxHeadingSequence);
+        at_break(tokenizer, code)
+    }
+}
+
+/// In whitespace.
+///
+/// ```markdown
+/// ## alpha | bravo
+/// ```
+fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(whitespace)), None)
+        }
+        _ => {
+            tokenizer.exit(TokenType::AtxHeadingWhitespace);
+            at_break(tokenizer, code)
+        }
+    }
+}
+
+/// In text.
+///
+/// ```markdown
+/// ## al|pha
+/// ```
+fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\t' | '\n' | '\r' | ' ') => {
+            tokenizer.exit(TokenType::AtxHeadingText);
+            at_break(tokenizer, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(data)), None)
+        }
+    }
+}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
new file mode 100644
index 0000000..b7d5570
--- /dev/null
+++ b/src/construct/html_flow.rs
@@ -0,0 +1,1068 @@
+//! HTML (flow) is a construct that occurs in the flow content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! html_flow ::= raw | comment | instruction | declaration | cdata | basic | complete
+//!
+//! ; Note: closing tag name need to match opening tag name.
+//! raw ::= '<' raw_tag_name [ [ ( whitespace | '>' ) *line ] *( eol *line ) ] [ '</' raw_tag_name *line ]
+//! comment ::= '<!--' [ *'-' '>' *line | *line *( eol *line ) [ '-->' *line ] ]
+//! instruction ::= '<?' [ '>' *line | *line *( eol *line ) [ '?>' *line ] ]
+//! declaration ::= '<!' ascii_alphabetic *line *( eol *line ) [ '>' *line ]
+//! cdata ::= '<![CDATA[' *line *( eol *line ) [ ']]>' *line ]
+//! basic ::= '< [ '/' ] basic_tag_name [ [ '/' ] '>' *line *( eol 1*line ) ]
+//! complete ::= ( opening_tag | closing_tag ) ( whitespace_optional *( eol 1*line ) | whitespace_optional )
+//!
+//! raw_tag_name ::= 'pre' | 'script' | 'style' | 'textarea' ; Note: case-insensitive.
+//! basic_tag_name ::= 'address' | 'article' | 'aside' | ... ; See `constants.rs`, and note: case-insensitive.
+//! opening_tag ::= '<' tag_name *( whitespace attribute ) [ whitespace_optional '/' ] whitespace_optional '>'
+//! closing_tag ::= '</' tag_name whitespace_optional '>'
+//! tag_name ::= ascii_alphabetic *( '-' | ascii_alphanumeric )
+//! attribute ::= attribute_name [ whitespace_optional '=' whitespace_optional attribute_value ]
+//! attribute_name ::= ( ':' | '_' | ascii_alphabetic ) *( '-' | '.' | ':' | '_' | ascii_alphanumeric )
+//! attribute_value ::= '"' *( line - '"' ) '"' | "'" *( line - "'" )  "'" | 1*( line - space_or_tab - '"' - "'" - '/' - '<' - '=' - '>' - '`')
+//!
+//! whitespace ::= 1*space_or_tab
+//! whitespace_optional ::= [ space_or_tab ]
+//! line ::= code - eol
+//! eol ::= '\r' | '\r\n' | '\n'
+//! space_or_tab ::= ' ' | '\t'
+//! ```
+//!
+//! The grammar for HTML in markdown does not resemble the rules of parsing
+//! HTML according to the [*§ 13.2 Parsing HTML documents* in the HTML
+//! spec][html-parsing].
+//! As such, HTML in markdown *resembles* HTML, but is instead a (naïve?)
+//! attempt to parse an XML-like language.
+//! By extension, another notable property of the grammar is that it can
+//! result in invalid HTML, in that it allows things that wouldn’t work or
+//! wouldn’t work well in HTML, such as mismatched tags.
+//!
+//! Because the **basic** and **complete** productions in the grammar form with
+//! a tag, followed by more stuff, and stop at a blank line, it is possible to
+//! interleave (a word for switching between languages) markdown and HTML
+//! together, by placing the opening and closing tags on their own lines,
+//! with blank lines between them and markdown.
+//! For example:
+//!
+//! ```markdown
+//! <div>This is a <code>div</code> but *this* is not emphasis.</div>
+//!
+//! <div>
+//!
+//! This is a paragraph in a `div` and *this* is emphasis.
+//!
+//! </div>
+//! ```
+//!
+//! The **complete** production of HTML (flow) is not allowed to interrupt
+//! content.
+//! That means that a blank line is needed between a paragraph and it.
+//! However, HTML (text) has a similar production, which will typically kick-in
+//! instead.
+//!
+//! The list of tag names allowed in the **raw** production are defined in
+//! [`HTML_RAW_NAMES`][html_raw_names].
+//! This production exists because there are a few cases where markdown
+//! *inside* some elements, and hence interleaving, does not make sense.
+//!
+//! The list of tag names allowed in the **basic** production are defined in
+//! [`HTML_BLOCK_NAMES`][html_block_names].
+//! This production exists because there are a few cases where we can decide
+//! early that something is going to be a flow (block) element instead of a
+//! phrasing (inline) element.
+//! We *can* interrupt and don’t have to care too much about it being
+//! well-formed.
+//!
+//! ## References
+//!
+//! *   [`html-flow.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-flow.js)
+//! *   [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks)
+//!
+//! [html_raw_names]: crate::constant::HTML_RAW_NAMES
+//! [html_block_names]: crate::constant::HTML_BLOCK_NAMES
+//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
+//!
+//! <!-- To do: link stuff -->
+
+use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX};
+use crate::construct::{blank_line::start as blank_line, partial_whitespace::start as whitespace};
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Kind of HTML (flow).
+#[derive(Debug, Clone, PartialEq)]
+enum Kind {
+    /// Not yet known.
+    Unknown,
+    /// Symbol for `<script>` (condition 1).
+    Raw,
+    /// Symbol for `<!---->` (condition 2).
+    Comment,
+    /// Symbol for `<?php?>` (condition 3).
+    Instruction,
+    /// Symbol for `<!doctype>` (condition 4).
+    Declaration,
+    /// Symbol for `<![CDATA[]]>` (condition 5).
+    Cdata,
+    /// Symbol for `<div` (condition 6).
+    Basic,
+    /// Symbol for `<x>` (condition 7).
+    Complete,
+}
+
+/// Type of quote, if we’re in an attribure, in complete (condition 7).
+#[derive(Debug, Clone, PartialEq)]
+enum QuoteKind {
+    /// Not in a quoted attribute.
+    None,
+    /// In a double quoted (`"`) attribute.
+    Double,
+    /// In a single quoted (`"`) attribute.
+    Single,
+}
+
+/// State needed to parse HTML (flow).
+#[derive(Debug, Clone)]
+struct Info {
+    /// Kind of HTML (flow).
+    kind: Kind,
+    /// Whether this is a start tag (`<` not followed by `/`).
+    start_tag: bool,
+    /// Used depending on `kind` to either collect all parsed characters, or to
+    /// store expected characters.
+    buffer: Vec<char>,
+    /// `index` into `buffer` when expecting certain characters.
+    index: usize,
+    /// Current quote, when in a double or single quoted attribute value.
+    quote: QuoteKind,
+}
+
+// To do: mark as concrete (block quotes or lists can’t “pierce” into HTML).
+
+/// Start of HTML (flow), before optional whitespace.
+///
+/// ```markdown
+/// |<x />
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::HtmlFlow);
+    tokenizer.enter(TokenType::HtmlFlowData);
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+        |_ok| Box::new(before),
+    )(tokenizer, code)
+}
+
+/// After optional whitespace, before `<`.
+///
+/// ```markdown
+/// |<x />
+/// ```
+fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if Code::Char('<') == code {
+        tokenizer.consume(code);
+        (
+            State::Fn(Box::new(|tokenizer, code| {
+                open(
+                    tokenizer,
+                    Info {
+                        kind: Kind::Unknown,
+                        start_tag: false,
+                        buffer: vec![],
+                        index: 0,
+                        quote: QuoteKind::None,
+                    },
+                    code,
+                )
+            })),
+            None,
+        )
+    } else {
+        (State::Nok, None)
+    }
+}
+
+/// After `<`, before a tag name or other stuff.
+///
+/// ```markdown
+/// <|x />
+/// <|!doctype />
+/// <|!--xxx--/>
+/// ```
+fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('!') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    declaration_start(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('/') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    tag_close_start(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('?') => {
+            // To do: life times.
+            let mut clone = info;
+            clone.kind = Kind::Instruction;
+            tokenizer.consume(code);
+            // While we’re in an instruction instead of a declaration, we’re on a `?`
+            // right now, so we do need to search for `>`, similar to declarations.
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(char) if char.is_ascii_alphabetic() => {
+            // To do: life times.
+            let mut clone = info;
+            clone.start_tag = true;
+            tag_name(tokenizer, clone, code)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `<!`, so inside a declaration, comment, or CDATA.
+///
+/// ```markdown
+/// <!|doctype />
+/// <!|--xxx--/>
+/// <!|[CDATA[>&<]]>
+/// ```
+fn declaration_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('-') => {
+            tokenizer.consume(code);
+            let mut clone = info;
+            clone.kind = Kind::Comment;
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    comment_open_inside(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('[') => {
+            tokenizer.consume(code);
+            let mut clone = info;
+            clone.kind = Kind::Cdata;
+            clone.buffer = vec!['C', 'D', 'A', 'T', 'A', '['];
+            clone.index = 0;
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    cdata_open_inside(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(char) if char.is_ascii_alphabetic() => {
+            tokenizer.consume(code);
+            // To do: life times.
+            let mut clone = info;
+            clone.kind = Kind::Declaration;
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `<!-`, inside a comment, before another `-`.
+///
+/// ```markdown
+/// <!-|-xxx--/>
+/// ```
+fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('-') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `<![`, inside CDATA, expecting `CDATA[`.
+///
+/// ```markdown
+/// <![|CDATA[>&<]]>
+/// <![CD|ATA[>&<]]>
+/// <![CDA|TA[>&<]]>
+/// <![CDAT|A[>&<]]>
+/// <![CDATA|[>&<]]>
+/// ```
+fn cdata_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == info.buffer[info.index] => {
+            let mut clone = info;
+            clone.index += 1;
+            tokenizer.consume(code);
+
+            if clone.index == clone.buffer.len() {
+                clone.buffer.clear();
+                (
+                    State::Fn(Box::new(|tokenizer, code| {
+                        continuation(tokenizer, clone, code)
+                    })),
+                    None,
+                )
+            } else {
+                (
+                    State::Fn(Box::new(|tokenizer, code| {
+                        cdata_open_inside(tokenizer, clone, code)
+                    })),
+                    None,
+                )
+            }
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `</`, in a closing tag, before a tag name.
+///
+/// ```markdown
+/// </|x>
+/// ```
+fn tag_close_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char.is_ascii_alphabetic() => {
+            tokenizer.consume(code);
+            // To do: life times.
+            let mut clone = info;
+            clone.buffer.push(char);
+            (
+                State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In a tag name.
+///
+/// ```markdown
+/// <a|b>
+/// </a|b>
+/// ```
+fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None
+        | Code::CarriageReturnLineFeed
+        | Code::VirtualSpace
+        | Code::Char('\t' | '\n' | '\r' | ' ' | '/' | '>') => {
+            let tag_name_buffer = info.buffer.iter().collect::<String>().to_lowercase();
+            let name = tag_name_buffer.as_str();
+            let slash = if let Code::Char(char) = code {
+                char == '/'
+            } else {
+                false
+            };
+
+            if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) {
+                // To do: life times.
+                let mut clone = info;
+                clone.kind = Kind::Raw;
+                clone.buffer.clear();
+                continuation(tokenizer, clone, code)
+            } else if HTML_BLOCK_NAMES.contains(&name) {
+                // To do: life times.
+                let mut clone = info;
+                clone.kind = Kind::Basic;
+                clone.buffer.clear();
+
+                if slash {
+                    tokenizer.consume(code);
+                    (
+                        State::Fn(Box::new(|tokenizer, code| {
+                            basic_self_closing(tokenizer, clone, code)
+                        })),
+                        None,
+                    )
+                } else {
+                    continuation(tokenizer, clone, code)
+                }
+            } else {
+                // To do: life times.
+                let mut clone = info;
+                clone.kind = Kind::Complete;
+
+                // To do: do not support complete HTML when interrupting.
+                if clone.start_tag {
+                    complete_attribute_name_before(tokenizer, clone, code)
+                } else {
+                    complete_closing_tag_after(tokenizer, clone, code)
+                }
+            }
+        }
+        Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => {
+            tokenizer.consume(code);
+            let mut clone = info;
+            clone.buffer.push(char);
+            (
+                State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))),
+                None,
+            )
+        }
+        Code::Char(_) => (State::Nok, None),
+    }
+}
+
+/// After a closing slash of a basic tag name.
+///
+/// ```markdown
+/// <div/|>
+/// ```
+fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After a closing slash of a complete tag name.
+///
+/// ```markdown
+/// <x/|>
+/// </x/|>
+/// ```
+fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_closing_tag_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_end(tokenizer, info, code),
+    }
+}
+
+/// At a place where an attribute name would be valid.
+///
+/// At first, this state is used after a complete tag name, after whitespace,
+/// where it expects optional attributes or the end of the tag.
+/// It is also reused after attributes, when expecting more optional
+/// attributes.
+///
+/// ```markdown
+/// <x |/>
+/// <x |:asd>
+/// <x |_asd>
+/// <x |asd>
+/// <x | >
+/// <x |>
+/// ```
+fn complete_attribute_name_before(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::Char('/') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_end(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(char) if char == ':' || char == '_' || char.is_ascii_alphabetic() => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_name(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_name_before(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_end(tokenizer, info, code),
+    }
+}
+
+/// In an attribute name.
+///
+/// ```markdown
+/// <x :|>
+/// <x _|>
+/// <x a|>
+/// ```
+fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char)
+            if char == '-'
+                || char == '.'
+                || char == ':'
+                || char == '_'
+                || char.is_ascii_alphanumeric() =>
+        {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_name(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_attribute_name_after(tokenizer, info, code),
+    }
+}
+
+/// After an attribute name, before an attribute initializer, the end of the
+/// tag, or whitespace.
+///
+/// ```markdown
+/// <x a|>
+/// <x a|=b>
+/// <x a|="c">
+/// ```
+fn complete_attribute_name_after(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::Char('=') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_before(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_name_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_attribute_name_before(tokenizer, info, code),
+    }
+}
+
+/// Before an unquoted, double quoted, or single quoted attribute value,
+/// allowing whitespace.
+///
+/// ```markdown
+/// <x a=|b>
+/// <x a=|"c">
+/// ```
+fn complete_attribute_value_before(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None),
+        Code::Char(char) if char == '"' || char == '\'' => {
+            tokenizer.consume(code);
+            // To do: life times.
+            let mut clone = info;
+            clone.quote = if char == '"' {
+                QuoteKind::Double
+            } else {
+                QuoteKind::Single
+            };
+
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_quoted(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_before(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_attribute_value_unquoted(tokenizer, info, code),
+    }
+}
+
+/// In a double or single quoted attribute value.
+///
+/// ```markdown
+/// <x a="|">
+/// <x a='|'>
+/// ```
+fn complete_attribute_value_quoted(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    let marker = if info.quote == QuoteKind::Double {
+        '"'
+    } else {
+        '\''
+    };
+
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
+        Code::Char(char) if char == marker => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_quoted_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_quoted(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// In an unquoted attribute value.
+///
+/// ```markdown
+/// <x a=b|c>
+/// ```
+fn complete_attribute_value_unquoted(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::None
+        | Code::CarriageReturnLineFeed
+        | Code::VirtualSpace
+        | Code::Char('\t' | '\n' | '\r' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => {
+            complete_attribute_name_after(tokenizer, info, code)
+        }
+        Code::Char(_) => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_unquoted(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// After a double or single quoted attribute value, before whitespace or the
+/// end of the tag.
+///
+/// ```markdown
+/// <x a="b"|>
+/// ```
+fn complete_attribute_value_quoted_after(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => {
+            complete_attribute_name_before(tokenizer, info, code)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In certain circumstances of a complete tag where only an `>` is allowed.
+///
+/// ```markdown
+/// <x a="b"|>
+/// ```
+fn complete_end(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `>` in a complete tag.
+///
+/// ```markdown
+/// <x>|
+/// ```
+fn complete_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            continuation(tokenizer, info, code)
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(_) => (State::Nok, None),
+    }
+}
+
+/// Inside continuation of any HTML kind.
+///
+/// ```markdown
+/// <!--x|xx-->
+/// ```
+fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('-') if info.kind == Kind::Comment => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_comment_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('<') if info.kind == Kind::Raw => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_raw_tag_open(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('>') if info.kind == Kind::Declaration => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_close(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('?') if info.kind == Kind::Instruction => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(']') if info.kind == Kind::Cdata => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_character_data_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
+            if info.kind == Kind::Basic || info.kind == Kind::Complete =>
+        {
+            let clone = info;
+
+            tokenizer.check(blank_line_before, |ok| {
+                if ok {
+                    Box::new(|tokenizer, code| continuation_close(tokenizer, clone, code))
+                } else {
+                    Box::new(|tokenizer, code| continuation_at_line_ending(tokenizer, clone, code))
+                }
+            })(tokenizer, code)
+        }
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            continuation_at_line_ending(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// In continuation, before an eol or eof.
+///
+/// ```markdown
+/// <x>|
+/// ```
+fn continuation_at_line_ending(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    tokenizer.exit(TokenType::HtmlFlowData);
+    html_continue_start(tokenizer, info, code)
+}
+
+/// In continuation, after an eol.
+///
+/// ```markdown
+/// <x>|
+/// asd
+/// ```
+fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None => {
+            tokenizer.exit(TokenType::HtmlFlow);
+            (State::Ok, Some(vec![code]))
+        }
+        // To do: do not allow lazy lines.
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.enter(TokenType::LineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::LineEnding);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    html_continue_start(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.enter(TokenType::HtmlFlowData);
+            continuation(tokenizer, info, code)
+        }
+    }
+}
+
+/// In comment continuation, after one `-`, expecting another.
+///
+/// ```markdown
+/// <!--xxx-|->
+/// ```
+fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('-') if info.kind == Kind::Comment => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In raw continuation, after `<`, expecting a `/`.
+///
+/// ```markdown
+/// <script>console.log(1)<|/script>
+/// ```
+fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('/') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_raw_end_tag(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In raw continuation, after `</`, expecting or inside a raw tag name.
+///
+/// ```markdown
+/// <script>console.log(1)</|script>
+/// <script>console.log(1)</s|cript>
+/// <script>console.log(1)</script|>
+/// ```
+fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            let tag_name_buffer = info.buffer.iter().collect::<String>().to_lowercase();
+            // To do: life times.
+            let mut clone = info;
+            clone.buffer.clear();
+
+            if HTML_RAW_NAMES.contains(&tag_name_buffer.as_str()) {
+                tokenizer.consume(code);
+                (
+                    State::Fn(Box::new(|tokenizer, code| {
+                        continuation_close(tokenizer, clone, code)
+                    })),
+                    None,
+                )
+            } else {
+                continuation(tokenizer, clone, code)
+            }
+        }
+        Code::Char(char) if char.is_ascii_alphabetic() && info.buffer.len() < HTML_RAW_SIZE_MAX => {
+            tokenizer.consume(code);
+            // To do: life times.
+            let mut clone = info;
+            clone.buffer.push(char);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_raw_end_tag(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In cdata continuation, after `]`, expecting `]>`.
+///
+/// ```markdown
+/// <![CDATA[>&<]|]>
+/// ```
+fn continuation_character_data_inside(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::Char(']') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In declaration or instruction continuation, waiting for `>` to close it.
+///
+/// ```markdown
+/// <!--|>
+/// <?ab?|>
+/// <?|>
+/// <!q|>
+/// <!--ab--|>
+/// <!--ab--|->
+/// <!--ab---|>
+/// <![CDATA[>&<]]|>
+/// ```
+fn continuation_declaration_inside(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_close(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('-') if info.kind == Kind::Comment => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In closed continuation: everything we get until the eol/eof is part of it.
+///
+/// ```markdown
+/// <!doctype>|
+/// ```
+fn continuation_close(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::HtmlFlowData);
+            tokenizer.exit(TokenType::HtmlFlow);
+            (State::Ok, Some(vec![code]))
+        }
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_close(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// Before a line ending, expecting a blank line.
+///
+/// ```markdown
+/// <div>|
+///
+/// ```
+fn blank_line_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::LineEnding);
+    tokenizer.consume(code);
+    tokenizer.exit(TokenType::LineEnding);
+    (State::Fn(Box::new(blank_line)), None)
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
new file mode 100644
index 0000000..d671db6
--- /dev/null
+++ b/src/construct/mod.rs
@@ -0,0 +1,11 @@
+//! Constructs found in markdown.
+
+pub mod blank_line;
+pub mod character_escape;
+pub mod character_reference;
+pub mod code_fenced;
+pub mod code_indented;
+pub mod heading_atx;
+pub mod html_flow;
+pub mod partial_whitespace;
+pub mod thematic_break;
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
new file mode 100644
index 0000000..dd0d2b5
--- /dev/null
+++ b/src/construct/partial_whitespace.rs
@@ -0,0 +1,66 @@
+//! A little helper to parse `space_or_tab`
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! space_or_tab ::= 1*(' ' '\t')
+//! ```
+//!
+//! Depending on where whitespace can occur, it can be optional (or not),
+//! and present in the rendered result (or not).
+//!
+//! ## References
+//!
+//! *   [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js)
+//!
+//! <!-- To do: link stuff -->
+
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+// To do: should `token_type` be a `Some`, with `None` defaulting to something?
+// To do: should `max: Some(usize)` be added?
+
+/// Before whitespace.
+///
+/// ```markdown
+/// alpha| bravo
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code, token_type: TokenType) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            // To do: lifetimes.
+            let clone = token_type.clone();
+            tokenizer.enter(token_type);
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| inside(tokenizer, code, clone))),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In whitespace.
+///
+/// ```markdown
+/// alpha |bravo
+/// alpha | bravo
+/// ```
+fn inside(tokenizer: &mut Tokenizer, code: Code, token_type: TokenType) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    inside(tokenizer, code, token_type)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.exit(token_type);
+            (State::Ok, Some(vec![code]))
+        }
+    }
+}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
new file mode 100644
index 0000000..15ebac7
--- /dev/null
+++ b/src/construct/thematic_break.rs
@@ -0,0 +1,137 @@
+//! Thematic breaks, sometimes called horizontal rules, are a construct that
+//! occurs in the flow content type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: all markers must be identical.
+//! ; Restriction: at least 3 markers must be used.
+//! thematic_break ::= *space_or_tab 1*(1*marker *space_or_tab)
+//!
+//! space_or_tab ::= ' ' | '\t'
+//! marker ::= '*' | '-' | '_'
+//! ```
+//!
+//! Thematic breaks in markdown typically relate to the HTML element `<hr>`.
+//! See [*§ 4.4.2 The `hr` element* in the HTML spec][html] for more info.
+//!
+//! It is recommended to use exactly three asterisks without whitespace when
+//! writing markdown.
+//! As using more than three markers has no effect other than wasting space,
+//! it is recommended to use exactly three markers.
+//! Thematic breaks formed with asterisks or dashes can interfere with lists
+//! in if there is whitespace between them: `* * *` and `- - -`.
+//! For these reasons, it is recommend to not use spaces or tabs between the
+//! markers.
+//! Thematic breaks formed with dashes (without whitespace) can also form
+//! setext headings.
+//! As dashes and underscores frequently occur in natural language and URLs, it
+//! is recommended to use asterisks for thematic breaks to distinguish from
+//! such use.
+//! Because asterisks can be used to form the most markdown constructs, using
+//! them has the added benefit of making it easier to gloss over markdown: you
+//! can look for asterisks to find syntax while not worrying about other
+//! characters.
+//!
+//! ## References
+//!
+//! *   [`thematic-break.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/thematic-break.js)
+//! *   [*§ 4.1 Thematic breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#thematic-breaks)
+//!
+//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-hr-element
+//!
+//! <!-- To do: link `flow` -->
+
+use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a thematic break.
+///
+/// ```markdown
+/// |***
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == '*' || char == '-' || char == '_' => {
+            tokenizer.enter(TokenType::ThematicBreak);
+            at_break(tokenizer, code, char, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After something but before something else.
+///
+/// ```markdown
+/// |***
+/// *| * *
+/// * |* *
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.enter(TokenType::ThematicBreakSequence);
+            sequence(tokenizer, code, marker, size)
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.enter(TokenType::ThematicBreakWhitespace);
+            whitespace(tokenizer, code, marker, size)
+        }
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
+            if size >= THEMATIC_BREAK_MARKER_COUNT_MIN =>
+        {
+            tokenizer.exit(TokenType::ThematicBreak);
+            (State::Ok, Some(vec![code]))
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In a sequence of markers.
+///
+/// ```markdown
+/// |***
+/// *|**
+/// **|*
+/// ```
+fn sequence(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    sequence(tokenizer, code, marker, size + 1)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.exit(TokenType::ThematicBreakSequence);
+            at_break(tokenizer, code, marker, size)
+        }
+    }
+}
+
+/// In whitespace.
+///
+/// ```markdown
+/// * |* *
+/// * | * *
+/// ```
+fn whitespace(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    whitespace(tokenizer, code, marker, size)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.exit(TokenType::ThematicBreakWhitespace);
+            at_break(tokenizer, code, marker, size)
+        }
+    }
+}
diff --git a/src/content/flow.rs b/src/content/flow.rs
new file mode 100644
index 0000000..21c5721
--- /dev/null
+++ b/src/content/flow.rs
@@ -0,0 +1,258 @@
+//! The flow content type.
+//!
+//! **Flow** represents the sections, such as headings, code, and content, which
+//! is parsed per line.
+//! An example is HTML, which has a certain starting condition (such as
+//! `<script>` on its own line), then continues for a while, until an end
+//! condition is found (such as `</style>`).
+//! If that line with an end condition is never found, that flow goes until
+//! the end.
+//!
+//! The constructs found in flow are:
+//!
+//! *   [Blank line][crate::construct::blank_line]
+//! *   [Code (fenced)][crate::construct::code_fenced]
+//! *   [Code (indented)][crate::construct::code_indented]
+//! *   [Heading (atx)][crate::construct::heading_atx]
+//! *   [HTML (flow)][crate::construct::html_flow]
+//! *   [Thematic break][crate::construct::thematic_break]
+//!
+//! <!-- To do: `setext` in content? Link to content. -->
+
+use crate::construct::{
+    blank_line::start as blank_line, code_fenced::start as code_fenced,
+    code_indented::start as code_indented, heading_atx::start as heading_atx,
+    html_flow::start as html_flow, partial_whitespace::start as whitespace,
+    thematic_break::start as thematic_break,
+};
+use crate::tokenizer::{Code, Event, State, StateFnResult, TokenType, Tokenizer};
+
+/// Turn `codes` as the flow content type into events.
+// To do: remove this `allow` when all the content types are glued together.
+#[allow(dead_code)]
+pub fn flow(codes: Vec<Code>) -> Vec<Event> {
+    let mut tokenizer = Tokenizer::new();
+    let (state, remainder) = tokenizer.feed(codes, Box::new(start), true);
+
+    if let Some(ref x) = remainder {
+        if !x.is_empty() {
+            unreachable!("expected no final remainder {:?}", x);
+        }
+    }
+
+    match state {
+        State::Ok => {}
+        _ => unreachable!("expected final state to be `State::Ok`"),
+    }
+
+    tokenizer.events
+}
+
+/// Before flow.
+///
+/// First we assume a blank line.
+//
+/// ```markdown
+/// |
+/// |## alpha
+/// |    bravo
+/// |***
+/// ```
+fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Ok, None),
+        _ => tokenizer.attempt(blank_line, |ok| {
+            Box::new(if ok { blank_line_after } else { initial_before })
+        })(tokenizer, code),
+    }
+}
+
+/// After a blank line.
+///
+/// Move to `start` afterwards.
+///
+/// ```markdown
+/// ␠␠|
+/// ```
+fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Ok, None),
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.enter(TokenType::BlankLineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::BlankLineEnding);
+            (State::Fn(Box::new(start)), None)
+        }
+        _ => unreachable!("expected eol/eof after blank line `{:?}`", code),
+    }
+}
+
+/// Before flow (initial).
+///
+/// “Initial” flow means unprefixed flow, so right at the start of a line.
+/// Interestingly, the only flow (initial) construct is indented code.
+/// Move to `before` afterwards.
+///
+/// ```markdown
+/// |qwe
+/// |    asd
+/// ```
+fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Ok, None),
+        _ => tokenizer.attempt(code_indented, |ok| {
+            Box::new(if ok {
+                after
+            } else {
+                initial_before_not_code_indented
+            })
+        })(tokenizer, code),
+    }
+}
+
+/// After a flow construct.
+///
+/// ```markdown
+/// ## alpha|
+/// |
+/// ~~~js
+/// asd
+/// ~~~|
+/// ```
+fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Ok, None),
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.enter(TokenType::LineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::LineEnding);
+            (State::Fn(Box::new(start)), None)
+        }
+        _ => unreachable!("unexpected non-eol/eof after flow `{:?}`", code),
+    }
+}
+
+/// Before flow (initial), but not at code (indented).
+///
+/// ```markdown
+/// |qwe
+/// ```
+fn initial_before_not_code_indented(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Ok, None),
+        _ => tokenizer.attempt(code_fenced, |ok| {
+            Box::new(if ok {
+                after
+            } else {
+                initial_before_not_code_fenced
+            })
+        })(tokenizer, code),
+    }
+}
+
+/// Before flow (initial), but not at code (fenced).
+///
+/// ```markdown
+/// |qwe
+/// ```
+fn initial_before_not_code_fenced(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Ok, None),
+        _ => tokenizer.attempt(html_flow, |ok| Box::new(if ok { after } else { before }))(
+            tokenizer, code,
+        ),
+    }
+}
+
+/// Before flow, but not at code (indented) or code (fenced).
+///
+/// Compared to flow (initial), normal flow can be arbitrarily prefixed.
+///
+/// ```markdown
+/// |qwe
+/// ```
+pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+        |_ok| Box::new(before_after_prefix),
+    )(tokenizer, code)
+}
+
+/// Before flow, after potential whitespace.
+///
+/// ```markdown
+/// |qwe
+/// ```
+pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.attempt(heading_atx, |ok| {
+        Box::new(if ok { after } else { before_not_heading_atx })
+    })(tokenizer, code)
+}
+
+/// Before flow, but not before a heading (atx)
+///
+/// ```markdown
+/// |qwe
+/// ```
+pub fn before_not_heading_atx(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.attempt(thematic_break, |ok| {
+        Box::new(if ok { after } else { before_not_thematic_break })
+    })(tokenizer, code)
+}
+
+/// Before flow, but not before a heading (atx) or thematic break.
+///
+/// ```markdown
+/// |qwe
+/// ```
+pub fn before_not_thematic_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.attempt(html_flow, |ok| {
+        Box::new(if ok { after } else { content_before })
+    })(tokenizer, code)
+}
+
+/// Before flow, but not before a heading (atx) or thematic break.
+///
+/// At this point, we’re at content (zero or more definitions and zero or one
+/// paragraph/setext heading).
+///
+/// ```markdown
+/// |qwe
+/// ```
+// To do: currently only parses a single line.
+// To do:
+// - Multiline
+// - One or more definitions.
+// - Setext heading.
+fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            after(tokenizer, code)
+        }
+        _ => {
+            tokenizer.enter(TokenType::Content);
+            tokenizer.enter(TokenType::ContentPhrasing);
+            tokenizer.consume(code);
+            (State::Fn(Box::new(content)), None)
+        }
+    }
+}
+/// In content.
+///
+/// ```markdown
+/// al|pha
+/// ```
+// To do: lift limitations as documented above.
+fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::ContentPhrasing);
+            tokenizer.exit(TokenType::Content);
+            after(tokenizer, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(content)), None)
+        }
+    }
+}
diff --git a/src/content/mod.rs b/src/content/mod.rs
new file mode 100644
index 0000000..d5771a3
--- /dev/null
+++ b/src/content/mod.rs
@@ -0,0 +1,4 @@
+//! Content types found in markdown.
+
+pub mod flow;
+pub mod string;
diff --git a/src/content/string.rs b/src/content/string.rs
new file mode 100644
index 0000000..a8a81b2
--- /dev/null
+++ b/src/content/string.rs
@@ -0,0 +1,120 @@
+//! The string content type.
+//!
+//! **String** is a limited **text** like content type which only allows
+//! character escapes and character references.
+//! It exists in things such as identifiers (media references, definitions),
+//! titles, URLs, code (fenced) info and meta parts.
+//!
+//! The constructs found in strin are:
+//!
+//! *   [Character escape][crate::construct::character_escape]
+//! *   [Character reference][crate::construct::character_reference]
+
+use crate::construct::{
+    character_escape::start as character_escape, character_reference::start as character_reference,
+};
+use crate::tokenizer::{Code, Event, State, StateFnResult, TokenType, Tokenizer};
+
+/// Turn `codes` as the string content type into events.
+// To do: remove this `allow` when all the content types are glued together.
+#[allow(dead_code)]
+pub fn string(codes: Vec<Code>) -> Vec<Event> {
+    let mut tokenizer = Tokenizer::new();
+    let (state, remainder) = tokenizer.feed(codes, Box::new(before), true);
+
+    if let Some(ref x) = remainder {
+        if !x.is_empty() {
+            unreachable!("expected no final remainder {:?}", x);
+        }
+    }
+
+    match state {
+        State::Ok => {}
+        _ => unreachable!("expected final state to be `State::Ok`"),
+    }
+
+    tokenizer.events
+}
+
+/// Before string.
+///
+/// First we assume character reference.
+///
+/// ```markdown
+/// |&amp;
+/// |\&
+/// |qwe
+/// ```
+fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Ok, None),
+        _ => tokenizer.attempt(character_reference, |ok| {
+            Box::new(if ok {
+                before
+            } else {
+                before_not_character_reference
+            })
+        })(tokenizer, code),
+    }
+}
+
+/// Before string, not at a character reference.
+///
+/// Assume character escape.
+///
+/// ```markdown
+/// |\&
+/// |qwe
+/// ```
+fn before_not_character_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Ok, None),
+        _ => tokenizer.attempt(character_escape, |ok| {
+            Box::new(if ok {
+                before
+            } else {
+                before_not_character_escape
+            })
+        })(tokenizer, code),
+    }
+}
+
+/// Before string, not at a character reference or character escape.
+///
+/// We’re at data.
+///
+/// ```markdown
+/// |qwe
+/// ```
+fn before_not_character_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if let Code::None = code {
+        (State::Ok, None)
+    } else {
+        tokenizer.enter(TokenType::Data);
+        tokenizer.consume(code);
+        (State::Fn(Box::new(in_data)), None)
+    }
+}
+
+/// In data.
+///
+/// ```markdown
+/// q|w|e
+/// ```
+fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => {
+            tokenizer.exit(TokenType::Data);
+            (State::Ok, None)
+        }
+        // To do: somehow get these markers from constructs.
+        Code::Char('&' | '\\') => {
+            tokenizer.exit(TokenType::Data);
+            before(tokenizer, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(in_data)), None)
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..1624a22
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,52 @@
+//! Public API of micromark.
+//!
+//! This module exposes [`micromark`][] (and [`micromark_with_options`][]).
+//! `micromark` is a safe way to transform (untrusted?) markdown into HTML.
+//! `micromark_with_options` allows you to configure how markdown is turned into
+//! HTML, such as by allowing dangerous HTML when you trust it.
+mod compiler;
+mod constant;
+mod construct;
+mod content;
+mod parser;
+mod tokenizer;
+mod util;
+
+use crate::compiler::compile;
+pub use crate::compiler::CompileOptions;
+use crate::parser::parse;
+
+/// Turn markdown into HTML.
+///
+/// ## Examples
+///
+/// ```rust
+/// use micromark::micromark;
+///
+/// let result = micromark("# Hello, world!");
+///
+/// assert_eq!(result, "<h1>Hello, world!</h1>");
+/// ```
+#[must_use]
+pub fn micromark(value: &str) -> String {
+    micromark_with_options(value, &CompileOptions::default())
+}
+
+/// Turn markdown into HTML, with configuration.
+///
+/// ## Examples
+///
+/// ```rust
+/// use micromark::{micromark_with_options, CompileOptions};
+///
+/// let result = micromark_with_options("<div>\n\n# Hello, world!\n\n</div>", &CompileOptions {
+///     allow_dangerous_html: true,
+/// });
+///
+/// assert_eq!(result, "<div>\n<h1>Hello, world!</h1>\n</div>");
+/// ```
+#[must_use]
+pub fn micromark_with_options(value: &str, options: &CompileOptions) -> String {
+    let (events, codes) = parse(value);
+    compile(&events, &codes, options)
+}
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..10c6e7a
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,14 @@
+//! Turn a string of markdown into events.
+// To do: this should start with `containers`, when they’re done.
+// To do: definitions and such will mean more data has to be passed around.
+use crate::content::flow::flow;
+use crate::tokenizer::{as_codes, Code, Event};
+
+/// Turn a string of markdown into events.
+/// Passes the codes back so the compiler can access the source.
+pub fn parse(value: &str) -> (Vec<Event>, Vec<Code>) {
+    let codes = as_codes(value);
+    // To do: pass a reference to this around, and slices in the (back)feeding. Might be tough.
+    let events = flow(codes.clone());
+    (events, codes)
+}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
new file mode 100644
index 0000000..c8b1440
--- /dev/null
+++ b/src/tokenizer.rs
@@ -0,0 +1,580 @@
+//! The tokenizer glues states from the state machine together.
+//!
+//! It facilitates everything needed to turn codes into tokens and events with
+//! a state machine.
+//! It also enables logic needed for parsing markdown, such as an [`attempt`][]
+//! to parse something, which can succeed or, when unsuccessful, revert the
+//! attempt.
+//! Similarly, a [`check`][] exists, which does the same as an `attempt` but
+//! reverts even if successful.
+//!
+//! [`attempt`]: Tokenizer::attempt
+//! [`check`]: Tokenizer::check
+
+use crate::constant::TAB_SIZE;
+
+/// Semantic label of a span.
+// To do: figure out how to share this so extensions can add their own stuff,
+// though perhaps that’s impossible and we should inline all extensions?
+// To do: document each variant.
+#[derive(Debug, Clone, PartialEq)]
+pub enum TokenType {
+    AtxHeading,
+    AtxHeadingSequence,
+    AtxHeadingWhitespace,
+    AtxHeadingText,
+
+    CharacterEscape,
+    CharacterEscapeMarker,
+    CharacterEscapeValue,
+
+    CharacterReference,
+    CharacterReferenceMarker,
+    CharacterReferenceMarkerNumeric,
+    CharacterReferenceMarkerHexadecimal,
+    CharacterReferenceMarkerSemi,
+    CharacterReferenceValue,
+
+    CodeFenced,
+    CodeFencedFence,
+    CodeFencedFenceSequence,
+    CodeFencedFenceWhitespace,
+    CodeFencedFenceInfo,
+    CodeFencedFenceMeta,
+
+    CodeIndented,
+    CodeIndentedPrefixWhitespace,
+
+    CodeFlowChunk,
+
+    Data,
+
+    HtmlFlow,
+    HtmlFlowData,
+
+    ThematicBreak,
+    ThematicBreakSequence,
+    ThematicBreakWhitespace,
+
+    Whitespace,
+    LineEnding,
+    BlankLineEnding,
+    BlankLineWhitespace,
+
+    Content,
+    ContentPhrasing,
+    ChunkString,
+}
+
+/// Enum representing a character code.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum Code {
+    /// End of the input stream (called eof).
+    None,
+    /// Used to make parsing line endings easier as it represents both
+    /// `Code::Char('\r')` and `Code::Char('\n')` combined.
+    CarriageReturnLineFeed,
+    /// the expansion of a tab (`Code::Char('\t')`), depending on where the tab
+    /// ocurred, it’s followed by 0 to 3 (both inclusive) `Code::VirtualSpace`s.
+    VirtualSpace,
+    /// The most frequent variant of this enum is `Code::Char(char)`, which just
+    /// represents a char, but micromark adds meaning to certain other values.
+    Char(char),
+}
+
+/// A location in the document (`line`/`column`/`offset`).
+///
+/// The interface for the location in the document comes from unist `Point`:
+/// <https://github.com/syntax-tree/unist#point>.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Point {
+    /// 1-indexed line number.
+    pub line: usize,
+    /// 1-indexed column number.
+    /// Note that this is increases up to a tab stop for tabs.
+    /// Some editors count tabs as 1 character, so this position is not always
+    /// the same as editors.
+    pub column: usize,
+    /// 0-indexed position in the document.
+    pub offset: usize,
+}
+
+/// Possible event types.
+#[derive(Debug, PartialEq)]
+pub enum EventType {
+    /// The start of something.
+    Enter,
+    /// The end of something.
+    Exit,
+}
+
+/// Something semantic happening somewhere.
+#[derive(Debug)]
+pub struct Event {
+    pub event_type: EventType,
+    pub token_type: TokenType,
+    pub point: Point,
+    pub index: usize,
+}
+
+/// The essence of the state machine are functions: `StateFn`.
+/// It’s responsible for dealing with that single passed [`Code`][].
+/// It yields a [`StateFnResult`][].
+pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> StateFnResult;
+/// Each [`StateFn`][] yields something back: primarily the state.
+/// In certain cases, it can also yield back up parsed codes that were passed down.
+pub type StateFnResult = (State, Option<Vec<Code>>);
+
+/// The result of a state.
+pub enum State {
+    /// There is a future state: a boxed [`StateFn`][] to pass the next code to.
+    Fn(Box<StateFn>),
+    /// The state is successful.
+    Ok,
+    /// The state is not successful.
+    Nok,
+}
+
+/// The internal state of a tokenizer, not to be confused with states from the
+/// state machine, this instead is all the information about where we currently
+/// are and what’s going on.
+#[derive(Debug, Clone)]
+struct InternalState {
+    /// Length of `events`. We only add to events, so reverting will just pop stuff off.
+    events_len: usize,
+    /// Length of the stack. It’s not allowed to decrease the stack in a check or an attempt.
+    stack_len: usize,
+    /// Current code.
+    current: Code,
+    /// `index` in codes of the current code.
+    index: usize,
+    /// Current relative and absolute position in the file.
+    point: Point,
+}
+
+/// A tokenizer itself.
+#[derive(Debug)]
+pub struct Tokenizer {
+    /// Track whether a character is expected to be consumed, and whether it’s
+    /// actually consumed
+    ///
+    /// Tracked to make sure everything’s valid.
+    consumed: bool,
+    /// Semantic labels of one or more codes in `codes`.
+    pub events: Vec<Event>,
+    /// Hierarchy of semantic labels.
+    ///
+    /// Tracked to make sure everything’s valid.
+    stack: Vec<TokenType>,
+    /// Current character code.
+    current: Code,
+    /// `index` in codes of the current code.
+    index: usize,
+    /// Current relative and absolute place in the file.
+    point: Point,
+}
+
+impl Tokenizer {
+    /// Create a new tokenizer.
+    pub fn new() -> Tokenizer {
+        Tokenizer {
+            current: Code::None,
+            index: 0,
+            consumed: true,
+            point: Point {
+                line: 1,
+                column: 1,
+                offset: 0,
+            },
+            stack: vec![],
+            events: vec![],
+        }
+    }
+
+    /// Prepare for a next code to get consumed.
+    fn expect(&mut self, code: Code) {
+        assert!(self.consumed, "expected previous character to be consumed");
+        self.consumed = false;
+        self.current = code;
+    }
+
+    /// Consume the current character.
+    /// Each [`StateFn`][] is expected to call this to signal that this code is
+    /// used, or call a next `StateFn`.
+    pub fn consume(&mut self, code: Code) {
+        assert_eq!(
+            code, self.current,
+            "expected given code to equal expected code"
+        );
+        log::debug!("consume: `{:?}` ({:?})", code, self.point);
+        assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned");
+
+        match code {
+            Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+                self.point.line += 1;
+                self.point.column = 1;
+                self.point.offset += if code == Code::CarriageReturnLineFeed {
+                    2
+                } else {
+                    1
+                };
+                // To do: accountForPotentialSkip()
+                log::debug!("position: after eol: `{:?}`", self.point);
+            }
+            Code::VirtualSpace => {
+                // Empty.
+            }
+            _ => {
+                self.point.column += 1;
+                self.point.offset += 1;
+            }
+        }
+
+        self.index += 1;
+        // Mark as consumed.
+        self.consumed = true;
+    }
+
+    /// Mark the start of a semantic label.
+    pub fn enter(&mut self, token_type: TokenType) {
+        log::debug!("enter `{:?}` ({:?})", token_type, self.point);
+        let event = Event {
+            event_type: EventType::Enter,
+            token_type: token_type.clone(),
+            point: self.point.clone(),
+            index: self.index,
+        };
+
+        self.events.push(event);
+        self.stack.push(token_type);
+    }
+
+    /// Mark the end of a semantic label.
+    pub fn exit(&mut self, token_type: TokenType) {
+        let token_on_stack = self.stack.pop().expect("cannot close w/o open tokens");
+
+        assert_eq!(
+            token_on_stack, token_type,
+            "expected exit TokenType to match current TokenType"
+        );
+
+        let ev = self.events.last().expect("cannot close w/o open event");
+
+        let point = self.point.clone();
+
+        assert!(
+            token_on_stack != ev.token_type || ev.point != point,
+            "expected non-empty TokenType"
+        );
+
+        log::debug!("exit `{:?}` ({:?})", token_type, self.point);
+        let event = Event {
+            event_type: EventType::Exit,
+            token_type,
+            point,
+            index: self.index,
+        };
+
+        self.events.push(event);
+    }
+
+    /// Capture the internal state.
+    fn capture(&mut self) -> InternalState {
+        InternalState {
+            index: self.index,
+            current: self.current,
+            point: self.point.clone(),
+            events_len: self.events.len(),
+            stack_len: self.stack.len(),
+        }
+    }
+
+    /// Apply the internal state.
+    fn free(&mut self, previous: InternalState) {
+        self.index = previous.index;
+        self.current = previous.current;
+        self.point = previous.point;
+        assert!(
+            self.events.len() >= previous.events_len,
+            "expected to restore less events than before"
+        );
+        self.events.truncate(previous.events_len);
+        assert!(
+            self.stack.len() >= previous.stack_len,
+            "expected to restore less stack items than before"
+        );
+        self.stack.truncate(previous.stack_len);
+    }
+
+    /// Check if `state` and its future states are successful or not.
+    ///
+    /// This captures the current state of the tokenizer, returns a wrapped
+    /// state that captures all codes and feeds them to `state` and its future
+    /// states until it yields [`State::Ok`][] or [`State::Nok`][].
+    /// It then applies the captured state, calls `done`, and feeds all
+    /// captured codes to its future states.
+    pub fn check(
+        &mut self,
+        state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+        done: impl FnOnce(bool) -> Box<StateFn> + 'static,
+    ) -> Box<StateFn> {
+        let previous = self.capture();
+
+        attempt_impl(
+            state,
+            vec![],
+            |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| {
+                let codes = result.0;
+                tokenizer.free(previous);
+                log::debug!(
+                    "check: {:?}, codes: {:?}, at {:?}",
+                    ok,
+                    codes,
+                    tokenizer.point
+                );
+                let result = done(ok);
+                tokenizer.feed(codes, result, false)
+            },
+        )
+    }
+
+    /// Attempt to parse with `state` and its future states, reverting if
+    /// unsuccessful.
+    ///
+    /// This captures the current state of the tokenizer, returns a wrapped
+    /// state that captures all codes and feeds them to `state` and its future
+    /// states until it yields [`State::Ok`][], at which point it calls `done`
+    /// and yields its result.
+    /// If instead [`State::Nok`][] was yielded, the captured state is applied,
+    /// `done` is called, and all captured codes are fed to its future states.
+    pub fn attempt(
+        &mut self,
+        state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+        done: impl FnOnce(bool) -> Box<StateFn> + 'static,
+    ) -> Box<StateFn> {
+        let previous = self.capture();
+
+        attempt_impl(
+            state,
+            vec![],
+            |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| {
+                let codes = if ok {
+                    result.1
+                } else {
+                    tokenizer.free(previous);
+                    result.0
+                };
+
+                log::debug!(
+                    "attempt: {:?}, codes: {:?}, at {:?}",
+                    ok,
+                    codes,
+                    tokenizer.point
+                );
+                let result = done(ok);
+                tokenizer.feed(codes, result, false)
+            },
+        )
+    }
+
+    /// Feed a list of `codes` into `start`.
+    ///
+    /// This is set up to support repeatedly calling `feed`, and thus streaming
+    /// markdown into the state machine, and normally pauses after feeding.
+    /// When `done: true` is passed, the EOF is fed.
+    pub fn feed(
+        &mut self,
+        codes: Vec<Code>,
+        start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+        drain: bool,
+    ) -> StateFnResult {
+        let mut codes = codes;
+        let mut state = State::Fn(Box::new(start));
+        let mut index = 0;
+
+        self.consumed = true;
+
+        while index < codes.len() {
+            let code = codes[index];
+
+            match state {
+                State::Nok | State::Ok => {
+                    break;
+                }
+                State::Fn(func) => {
+                    log::debug!("main: passing `{:?}`", code);
+                    self.expect(code);
+                    let (next, remainder) = check_statefn_result(func(self, code));
+                    state = next;
+                    index = index + 1
+                        - (if let Some(ref x) = remainder {
+                            x.len()
+                        } else {
+                            0
+                        });
+                }
+            }
+        }
+
+        // Yield to a higher loop if we shouldn’t feed EOFs.
+        if !drain {
+            return (state, Some(codes.split_off(index)));
+        }
+
+        loop {
+            // Feed EOF.
+            match state {
+                State::Ok | State::Nok => break,
+                State::Fn(func) => {
+                    let code = Code::None;
+                    log::debug!("main: passing eof");
+                    self.expect(code);
+                    let (next, remainder) = check_statefn_result(func(self, code));
+
+                    if let Some(ref x) = remainder {
+                        if !x.is_empty() {
+                            // To do: handle?
+                            unreachable!("drain:remainder {:?}", x);
+                        }
+                    }
+
+                    state = next;
+                }
+            }
+        }
+
+        check_statefn_result((state, None))
+    }
+}
+
+/// Internal utility to wrap states to also capture codes.
+///
+/// Recurses into itself.
+/// Used in [`Tokenizer::attempt`][Tokenizer::attempt] and  [`Tokenizer::check`][Tokenizer::check].
+fn attempt_impl(
+    state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+    codes: Vec<Code>,
+    done: impl FnOnce((Vec<Code>, Vec<Code>), bool, &mut Tokenizer) -> StateFnResult + 'static,
+) -> Box<StateFn> {
+    Box::new(|tokenizer, code| {
+        let mut codes = codes;
+
+        let (next, remainder) = check_statefn_result(state(tokenizer, code));
+
+        match code {
+            Code::None => {}
+            _ => {
+                codes.push(code);
+            }
+        }
+
+        // To do: `remainder` must never be bigger than codes I guess?
+        // To do: `remainder` probably has to be taken *from* `codes`, in a similar vain to the `Ok` handling below.
+        match next {
+            State::Ok => {
+                let remaining = if let Some(x) = remainder { x } else { vec![] };
+                check_statefn_result(done((codes, remaining), true, tokenizer))
+            }
+            State::Nok => check_statefn_result(done((codes, vec![]), false, tokenizer)),
+            State::Fn(func) => {
+                check_statefn_result((State::Fn(attempt_impl(func, codes, done)), None))
+            }
+        }
+    })
+}
+
+/// Turn a string into codes.
+// To do: handle BOM at start?
+pub fn as_codes(value: &str) -> Vec<Code> {
+    let mut codes: Vec<Code> = vec![];
+    let mut at_carriage_return = false;
+    let mut column = 1;
+
+    for char in value.chars() {
+        // Send a CRLF.
+        if at_carriage_return && '\n' == char {
+            at_carriage_return = false;
+            codes.push(Code::CarriageReturnLineFeed);
+        } else {
+            // Send the previous CR: we’re not at a next `\n`.
+            if at_carriage_return {
+                at_carriage_return = false;
+                codes.push(Code::Char('\r'));
+            }
+
+            match char {
+                // Send a replacement character.
+                '\0' => {
+                    column += 1;
+                    codes.push(Code::Char('�'));
+                }
+                // Send a tab and virtual spaces.
+                '\t' => {
+                    // To do: is this correct?
+                    let virtual_spaces = TAB_SIZE - (column % TAB_SIZE);
+                    println!("tabs, expand {:?}, {:?}", column, virtual_spaces);
+                    codes.push(Code::Char(char));
+                    column += 1;
+                    let mut index = 0;
+                    while index < virtual_spaces {
+                        codes.push(Code::VirtualSpace);
+                        column += 1;
+                        index += 1;
+                    }
+                }
+                // Send an LF.
+                '\n' => {
+                    column = 1;
+                    codes.push(Code::Char(char));
+                }
+                // Don’t send anything yet.
+                '\r' => {
+                    column = 1;
+                    at_carriage_return = true;
+                }
+                // Send the char.
+                _ => {
+                    column += 1;
+                    codes.push(Code::Char(char));
+                }
+            }
+        };
+    }
+
+    // To do: handle a final CR?
+
+    codes
+}
+
+/// Check a [`StateFnResult`][], make sure its valid (that there are no bugs),
+/// and clean a final eof passed back in `remainder`.
+fn check_statefn_result(result: StateFnResult) -> StateFnResult {
+    let (state, mut remainder) = result;
+
+    match state {
+        State::Nok | State::Fn(_) => {
+            if let Some(ref x) = remainder {
+                assert_eq!(
+                    x.len(),
+                    0,
+                    "expected `None` to be passed back as remainder from `State::Nok`, `State::Fn`"
+                );
+            }
+        }
+        State::Ok => {}
+    }
+
+    // Remove an eof.
+    // For convencience, feeding back an eof is allowed, but cleaned here.
+    // Most states handle eof and eol in the same branch, and hence pass
+    // all back.
+    // This might not be needed, because if EOF is passed back, we’re at the EOF.
+    // But they’re not supposed to be in codes, so here we remove them.
+    if let Some(ref mut list) = remainder {
+        if Some(&Code::None) == list.last() {
+            list.pop();
+        }
+    }
+
+    (state, remainder)
+}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644
index 0000000..47359a3
--- /dev/null
+++ b/src/util.rs
@@ -0,0 +1,241 @@
+//! Some utilities helpful when parsing and compiling markdown.
+
+use crate::constant::{CHARACTER_REFERENCE_NAMES, CHARACTER_REFERENCE_VALUES};
+use crate::tokenizer::{Code, Event, EventType};
+
+/// Encode dangerous html characters.
+///
+/// This ensures that certain characters which have special meaning in HTML are
+/// dealt with.
+/// Technically, we can skip `>` and `"` in many cases, but CM includes them.
+///
+/// This behavior is not explained in prose in `CommonMark` but can be inferred
+/// from the input/output test cases.
+///
+/// ## Examples
+///
+/// ```rust ignore
+/// use micromark::util::encode;
+///
+/// assert_eq!(encode("I <3 🦀"), "I &lt;3 🦀");
+/// ```
+///
+/// ## References
+///
+/// *   [`micromark-util-encode` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-encode)
+pub fn encode(value: &str) -> String {
+    value
+        .replace('&', "&amp;")
+        .replace('"', "&quot;")
+        .replace('<', "&lt;")
+        .replace('>', "&gt;")
+}
+
+/// Decode numeric character references.
+///
+/// Turn the number (in string form as either hexadecimal or decimal) coming
+/// from a numeric character reference into a character.
+/// Whether the base of the string form is `10` (decimal) or `16` (hexadecimal)
+/// must be passed as the `radix` parameter.
+///
+/// This returns the `char` associated with that number or a replacement
+/// character for C0 control characters (except for ASCII whitespace), C1
+/// control characters, lone surrogates, noncharacters, and out of range
+/// characters.
+///
+/// ## Examples
+///
+/// ```rust ignore
+/// use micromark::util::decode_numeric_character_reference;
+///
+/// assert_eq!(decode_numeric_character_reference("123", 10), '{');
+/// assert_eq!(decode_numeric_character_reference("9", 16), '\t');
+/// assert_eq!(decode_numeric_character_reference("0", 10), '�'); // Not allowed.
+/// ```
+///
+/// ## Panics
+///
+/// This function panics if a invalid string or an out of bounds valid string
+/// is given.
+/// It is expected that figuring out whether a number is allowed is handled in
+/// the parser.
+/// When `micromark` is used, this function never panics.
+///
+/// ## References
+///
+/// *   [`micromark-util-decode-numeric-character-reference` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-decode-numeric-character-reference)
+/// *   [*§ 2.5 Entity and numeric character references* in `CommonMark`](https://spec.commonmark.org/0.30/#entity-and-numeric-character-references)
+pub fn decode_numeric_character_reference(value: &str, radix: u32) -> char {
+    let code = u32::from_str_radix(value, radix).expect("expected `value` to be an int");
+
+    if
+    // C0 except for HT, LF, FF, CR, space
+    code < 0x09 ||
+    code == 0x0B ||
+    (code > 0x0D && code < 0x20) ||
+    // Control character (DEL) of the basic block and C1 controls.
+    (code > 0x7E && code < 0xA0) ||
+    // Lone high surrogates and low surrogates.
+    (code > 0xd7ff && code < 0xe000) ||
+    // Noncharacters.
+    (code > 0xfdcf && code < 0xfdf0) ||
+    ((code & 0xffff) == 0xffff) ||
+    ((code & 0xffff) == 0xfffe) ||
+    // Out of range
+    code > 0x0010_ffff
+    {
+        '�'
+    } else {
+        char::from_u32(code).expect("expected valid `code`")
+    }
+}
+
+/// Decode named character references.
+///
+/// Turn the name coming from a named character reference (without the `&` or
+/// `;`) into a string.
+/// This looks the given string up in [`CHARACTER_REFERENCE_NAMES`][] and then
+/// takes the corresponding value from [`CHARACTER_REFERENCE_VALUES`][].
+///
+/// The result is `String` instead of `char` because named character references
+/// can expand into multiple characters.
+///
+/// ## Examples
+///
+/// ```rust ignore
+/// use micromark::util::decode_named_character_reference;
+///
+/// assert_eq!(decode_named_character_reference("amp"), "&");
+/// assert_eq!(decode_named_character_reference("AElig"), "Æ");
+/// assert_eq!(decode_named_character_reference("aelig"), "æ");
+/// ```
+///
+/// ## Panics
+///
+/// This function panics if a name not in [`CHARACTER_REFERENCE_NAMES`][] is
+/// given.
+/// It is expected that figuring out whether a name is allowed is handled in
+/// the parser.
+/// When `micromark` is used, this function never panics.
+///
+/// ## References
+///
+/// *   [`wooorm/decode-named-character-reference`](https://github.com/wooorm/decode-named-character-reference)
+/// *   [*§ 2.5 Entity and numeric character references* in `CommonMark`](https://spec.commonmark.org/0.30/#entity-and-numeric-character-references)
+pub fn decode_named_character_reference(value: &str) -> String {
+    let position = CHARACTER_REFERENCE_NAMES.iter().position(|&x| x == value);
+    if let Some(index) = position {
+        CHARACTER_REFERENCE_VALUES[index].to_string()
+    } else {
+        unreachable!("expected valid `name`")
+    }
+}
+
+/// A struct representing the span of an opening and closing event of a token.
+#[derive(Debug)]
+pub struct Span {
+    // To do: probably needed in the future.
+    // start: Point,
+    /// Absolute offset (and `index` in `codes`) of where this span starts.
+    pub start_index: usize,
+    // To do: probably needed in the future.
+    // end: Point,
+    /// Absolute offset (and `index` in `codes`) of where this span ends.
+    pub end_index: usize,
+    // To do: probably needed in the future.
+    // token_type: TokenType,
+}
+
+/// Get a span from an event.
+///
+/// Get the span of an `exit` event, by looking backwards through the events to
+/// find the corresponding `enter` event.
+/// This assumes that tokens with the same are not nested.
+///
+/// ## Panics
+///
+/// This function panics if an enter event is given.
+/// When `micromark` is used, this function never panics.
+pub fn get_span(events: &[Event], index: usize) -> Span {
+    let exit = &events[index];
+    // let end = exit.point.clone();
+    let end_index = exit.index;
+    let token_type = exit.token_type.clone();
+    // To do: support `enter` events if needed and walk forwards?
+    assert_eq!(
+        exit.event_type,
+        EventType::Exit,
+        "expected get_span to be called on `exit` event"
+    );
+    let mut start_index = index - 1;
+
+    loop {
+        let enter = &events[start_index];
+        if enter.event_type == EventType::Enter && enter.token_type == token_type {
+            return Span {
+                // start: enter.point.clone(),
+                start_index: enter.index,
+                // end,
+                end_index,
+                // token_type,
+            };
+        }
+
+        start_index -= 1;
+    }
+}
+
+/// Serialize a span, optionally expanding tabs.
+pub fn slice_serialize(codes: &[Code], span: &Span, expand_tabs: bool) -> String {
+    serialize_chunks(slice_codes(codes, span), expand_tabs)
+}
+
+/// Get a slice of codes from a span.
+pub fn slice_codes<'a>(codes: &'a [Code], span: &Span) -> &'a [Code] {
+    &codes[span.start_index..span.end_index]
+}
+
+/// Serialize a slice of codes, optionally expanding tabs.
+pub fn serialize_chunks(codes: &[Code], expand_tabs: bool) -> String {
+    let mut at_tab = false;
+    let mut index = 0;
+    let mut value: Vec<char> = vec![];
+
+    while index < codes.len() {
+        let code = codes[index];
+        let mut at_tab_next = false;
+
+        match code {
+            Code::CarriageReturnLineFeed => {
+                value.push('\r');
+                value.push('\n');
+            }
+            Code::Char(char) if char == '\n' || char == '\r' => {
+                value.push(char);
+            }
+            Code::Char(char) if char == '\t' => {
+                at_tab_next = true;
+                value.push(if expand_tabs { ' ' } else { char });
+            }
+            Code::VirtualSpace => {
+                if !expand_tabs && at_tab {
+                    index += 1;
+                    continue;
+                }
+                value.push(' ');
+            }
+            Code::Char(char) => {
+                value.push(char);
+            }
+            Code::None => {
+                unreachable!("unexpected EOF code in codes");
+            }
+        }
+
+        at_tab = at_tab_next;
+
+        index += 1;
+    }
+
+    value.into_iter().collect()
+}
-- 
cgit