.

author: Titus Wormer <tituswormer@gmail.com> 2022-06-08 15:52:16 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-08 15:52:16 +0200
commit: 4c06c8554c35887f8f5147783953b2b7e7c2327f (patch)
tree: 1b2463848a3ae4c645f7f1a325877ee829ab65c5 /src/construct
download: markdown-rs-4c06c8554c35887f8f5147783953b2b7e7c2327f.tar.gz
markdown-rs-4c06c8554c35887f8f5147783953b2b7e7c2327f.tar.bz2
markdown-rs-4c06c8554c35887f8f5147783953b2b7e7c2327f.zip
10 files changed, 2595 insertions, 0 deletions
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
new file mode 100644
index 0000000..7b7962b
--- /dev/null
+++ b/src/construct/blank_line.rs
@@ -0,0 +1,61 @@
+//! Blank lines are a construct that occurs in the flow content type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! blank_line ::= *(' ' '\t')
+//! ```
+//!
+//! Blank lines are sometimes needed, such as to differentiate a paragraph
+//! from another paragraph.
+//! In several cases, blank lines are not needed between flow constructs,
+//! such as between two headings.
+//! Sometimes, whether blank lines are present, changes the behavior of how
+//! HTML is rendered, such as whether blank lines are present between list
+//! items in a list.
+//! More than one blank line is never needed in `CommonMark`.
+//!
+//! Because blank lines can be empty (line endings are not considered part of
+//! it), and events cannot be empty, blank lines are not present as a token.
+//!
+//! ## References
+//!
+//! *   [`blank-line.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/blank-line.js)
+//! *   [*§ 4.9 Blank lines* in `CommonMark`](https://spec.commonmark.org/0.30/#blank-lines)
+//!
+//! <!-- To do: link `flow`, `heading`, `list`, `paragraph` -->
+
+use crate::construct::partial_whitespace::start as whitespace;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a blank line.
+///
+/// Note: `␠` represents a space character.
+///
+/// ```markdown
+/// |␠␠
+/// |
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::BlankLineWhitespace),
+        |_ok| Box::new(after),
+    )(tokenizer, code)
+}
+
+/// After zero or more spaces or tabs, before a line ending or EOF.
+///
+/// Note: `␠` represents a space character.
+///
+/// ```markdown
+/// |␠␠
+/// |
+/// ```
+fn after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            (State::Ok, Some(vec![code]))
+        }
+        _ => (State::Nok, None),
+    }
+}
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
new file mode 100644
index 0000000..5ea995e
--- /dev/null
+++ b/src/construct/character_escape.rs
@@ -0,0 +1,69 @@
+//! Character escapes are a construct that occurs in the string and text
+//! content types.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! character_escape ::= '\\' ascii_punctuation
+//! ```
+//!
+//! Like much of markdown, there are no “invalid” character escapes: just a
+//! slash, or a slash followed by anything other than an ASCII punctuation
+//! character, is exactly that: just a slash.
+//! To escape (most) arbitrary characters, use a
+//! [character reference][] instead
+//! (as in, `&amp;`, `&#123;`, or say `&#x9;`).
+//! It is also possible to escape a line ending in text with a similar
+//! construct: a backslash followed by a line ending (that is part of the
+//! construct instead of ending it).
+//!
+//! ## References
+//!
+//! *   [`character-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/character-escape.js)
+//! *   [*§ 2.4 Backslash escapes* in `CommonMark`](https://spec.commonmark.org/0.30/#backslash-escapes)
+//!
+//! [character reference]: crate::construct::character_reference
+//!
+//! <!-- To do: link `hard_break_escape`, `string`, `text` -->
+
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a character escape.
+///
+/// ```markdown
+/// a|\*b
+/// a|\b
+/// a|\ b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('\\') => {
+            tokenizer.enter(TokenType::CharacterEscape);
+            tokenizer.enter(TokenType::CharacterEscapeMarker);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterEscapeMarker);
+            (State::Fn(Box::new(inside)), None)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside a character escape, after `\`.
+///
+/// ```markdown
+/// a\|*b
+/// a\|b
+/// a\| b
+/// ```
+fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char.is_ascii_punctuation() => {
+            tokenizer.enter(TokenType::CharacterEscapeValue);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterEscapeValue);
+            tokenizer.exit(TokenType::CharacterEscape);
+            (State::Ok, None)
+        }
+        _ => (State::Nok, None),
+    }
+}
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
new file mode 100644
index 0000000..27275d5
--- /dev/null
+++ b/src/construct/character_reference.rs
@@ -0,0 +1,237 @@
+//! Character references are a construct that occurs in the string and text
+//! content types.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! character_reference ::= '&' (numeric | named) ';'
+//!
+//! numeric ::= '#' (hexadecimal | decimal)
+//! ; Note: Limit of `6` imposed as all bigger numbers are invalid:
+//! hexadecimal ::= ('x' | 'X') 1*6(ascii_hexdigit)
+//! ; Note: Limit of `7` imposed as all bigger numbers are invalid:
+//! decimal ::= 1*7(ascii_digit)
+//! ; Note: Limit of `31` imposed by `CounterClockwiseContourIntegral`:
+//! ; Note: Limited to any known named character reference (see `constants.rs`)
+//! named ::= 1*31(ascii_alphanumeric)
+//! ```
+//!
+//! Like much of markdown, there are no “invalid” character references.
+//! However, for security reasons, several numeric character references parse
+//! fine but are not rendered as their corresponding character and they are
+//! instead replaced by a U+FFFD REPLACEMENT CHARACTER (`�`).
+//! See [`decode_numeric_character_reference`][decode_numeric] for more info.
+//!
+//! To escape ASCII punctuation characters, use the terser
+//! [character escape][character_escape] construct instead (as in, `\&`).
+//!
+//! Character references in markdown are not the same as character references
+//! in HTML.
+//! Notably, HTML allows several character references without a closing
+//! semicolon.
+//! See [*§ 13.2.5.72 Character reference state* in the HTML spec][html] for more info.
+//!
+//! Character references are parsed insensitive to casing.
+//! The casing of hexadecimal numeric character references has no effect.
+//! The casing of named character references does not matter when parsing them,
+//! but does affect whether they match.
+//! Depending on the name, one or more cases are allowed, such as that `AMP`
+//! and `amp` are both allowed but other cases are not.
+//! See [`CHARACTER_REFERENCE_NAMES`][character_reference_names] for which
+//! names match.
+//!
+//! ## References
+//!
+//! *   [`character-reference.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/character-reference.js)
+//! *   [*§ 2.5 Entity and numeric character references* in `CommonMark`](https://spec.commonmark.org/0.30/#entity-and-numeric-character-references)
+//!
+//! [character_escape]: crate::construct::character_reference
+//! [decode_numeric]: crate::util::decode_numeric_character_reference
+//! [character_reference_names]: crate::constant::CHARACTER_REFERENCE_NAMES
+//! [html]: https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
+//!
+//! <!-- To do: link `string`, `text` -->
+
+use crate::constant::{
+    CHARACTER_REFERENCE_DECIMAL_SIZE_MAX, CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX,
+    CHARACTER_REFERENCE_NAMED_SIZE_MAX, CHARACTER_REFERENCE_NAMES,
+};
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Kind of a character reference.
+#[derive(Debug, Clone)]
+pub enum Kind {
+    /// Numeric decimal character reference (`&#x9;`).
+    Decimal,
+    /// Numeric hexadecimal character reference (`&#123;`).
+    Hexadecimal,
+    /// Named character reference (`&amp;`).
+    Named,
+}
+
+/// State needed to parse character references.
+#[derive(Debug, Clone)]
+struct Info {
+    /// All parsed characters.
+    buffer: Vec<char>,
+    /// Kind of character reference.
+    kind: Kind,
+}
+
+/// Start of a character reference.
+///
+/// ```markdown
+/// a|&amp;b
+/// a|&#123;b
+/// a|&#x9;b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('&') => {
+            tokenizer.enter(TokenType::CharacterReference);
+            tokenizer.enter(TokenType::CharacterReferenceMarker);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterReferenceMarker);
+            (State::Fn(Box::new(open)), None)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside a character reference, after `&`, before `#` for numeric references
+/// or an alphanumeric for named references.
+///
+/// ```markdown
+/// a&|amp;b
+/// a&|#123;b
+/// a&|#x9;b
+/// ```
+fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if let Code::Char('#') = code {
+        tokenizer.enter(TokenType::CharacterReferenceMarkerNumeric);
+        tokenizer.consume(code);
+        tokenizer.exit(TokenType::CharacterReferenceMarkerNumeric);
+        (State::Fn(Box::new(numeric)), None)
+    } else {
+        tokenizer.enter(TokenType::CharacterReferenceValue);
+        value(
+            tokenizer,
+            code,
+            Info {
+                buffer: vec![],
+                kind: Kind::Named,
+            },
+        )
+    }
+}
+
+/// Inside a numeric character reference, right before `x` for hexadecimals,
+/// or a digit for decimals.
+///
+/// ```markdown
+/// a&#|123;b
+/// a&#|x9;b
+/// ```
+fn numeric(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == 'x' || char == 'X' => {
+            tokenizer.enter(TokenType::CharacterReferenceMarkerHexadecimal);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterReferenceMarkerHexadecimal);
+            tokenizer.enter(TokenType::CharacterReferenceValue);
+
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    value(
+                        tokenizer,
+                        code,
+                        Info {
+                            buffer: vec![],
+                            kind: Kind::Hexadecimal,
+                        },
+                    )
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.enter(TokenType::CharacterReferenceValue);
+
+            value(
+                tokenizer,
+                code,
+                Info {
+                    buffer: vec![],
+                    kind: Kind::Decimal,
+                },
+            )
+        }
+    }
+}
+
+/// Inside a character reference value, after the markers (`&#x`, `&#`, or
+/// `&`) that define its kind, but before the `;`.
+/// The character reference kind defines what and how many characters are
+/// allowed.
+///
+/// ```markdown
+/// a&a|mp;b
+/// a&#1|23;b
+/// a&#x|9;b
+/// ```
+fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+    match code {
+        Code::Char(';') if !info.buffer.is_empty() => {
+            tokenizer.exit(TokenType::CharacterReferenceValue);
+            let value = info.buffer.iter().collect::<String>();
+
+            if let Kind::Named = info.kind {
+                if !CHARACTER_REFERENCE_NAMES.contains(&value.as_str()) {
+                    return (State::Nok, Some(vec![code]));
+                }
+            }
+
+            tokenizer.enter(TokenType::CharacterReferenceMarkerSemi);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CharacterReferenceMarkerSemi);
+            tokenizer.exit(TokenType::CharacterReference);
+            (State::Ok, None)
+        }
+        Code::Char(char) => {
+            let len = info.buffer.len();
+
+            let cont = match info.kind {
+                Kind::Hexadecimal
+                    if char.is_ascii_hexdigit()
+                        && len < CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX =>
+                {
+                    true
+                }
+                Kind::Decimal
+                    if char.is_ascii_digit() && len < CHARACTER_REFERENCE_DECIMAL_SIZE_MAX =>
+                {
+                    true
+                }
+                Kind::Named
+                    if char.is_ascii_alphanumeric() && len < CHARACTER_REFERENCE_NAMED_SIZE_MAX =>
+                {
+                    true
+                }
+                _ => false,
+            };
+
+            if cont {
+                let mut clone = info;
+                clone.buffer.push(char);
+                tokenizer.consume(code);
+                (
+                    State::Fn(Box::new(|tokenizer, code| value(tokenizer, code, clone))),
+                    None,
+                )
+            } else {
+                (State::Nok, None)
+            }
+        }
+        _ => (State::Nok, None),
+    }
+}
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
new file mode 100644
index 0000000..2068a62
--- /dev/null
+++ b/src/construct/code_fenced.rs
@@ -0,0 +1,581 @@
+//! Code (fenced) is a construct that occurs in the flow content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! code_fenced ::= fence_open *( eol *code ) [ eol fence_close ]
+//!
+//! fence_open ::= sequence [ 1*space_or_tab info [ 1*space_or_tab meta ] ] *space_or_tab
+//! ; Restriction: the number of markers in the closing fence sequence must be
+//! ; equal to or greater than the number of markers in the opening fence
+//! ; sequence.
+//! ; Restriction: the marker in the closing fence sequence must match the
+//! ; marker in the opening fence sequence
+//! fence_close ::= sequence *space_or_tab
+//! sequence ::= 3*'`' | 3*'~'
+//! info ::= 1*text
+//! meta ::= 1*text *( *space_or_tab 1*text )
+//!
+//! ; Restriction: the `` ` `` character cannot occur in `text` if it is the
+//! ; marker of the opening fence sequence.
+//! text ::= code - eol - space_or_tab
+//! eol ::= '\r' | '\r\n' | '\n'
+//! space_or_tab ::= ' ' | '\t'
+//! code ::= . ; any unicode code point (other than line endings).
+//! ```
+//!
+//! The above grammar does not show how whitespace is handled.
+//! To parse code (fenced), let `X` be the number of whitespace characters
+//! before the opening fence sequence.
+//! Each line of content is then allowed (not required) to be indented with up
+//! to `X` spaces or tabs, which are then ignored as an indent instead of being
+//! considered as part of the code.
+//! This indent does not affect the closing fence.
+//! It can be indented up to a separate 3 spaces or tabs.
+//! A bigger indent makes it part of the code instead of a fence.
+//!
+//! Code (fenced) relates to both the `<pre>` and the `<code>` elements in
+//! HTML.
+//! See [*§ 4.4.3 The `pre` element*][html-pre] and the [*§ 4.5.15 The `code`
+//! element*][html-code] in the HTML spec for more info.
+//!
+//! The optional `meta` part is ignored: it is not used when parsing or
+//! rendering.
+//! The optional `info` part is used and is expected to specify the programming
+//! language that the code is in.
+//! Which value it holds depends on what your syntax highlighter supports, if
+//! one is used.
+//! The `info` is, when rendering to HTML, typically exposed as a class.
+//! This behavior stems from the HTML spec ([*§ 4.5.15 The `code`
+//! element*][html-code]).
+//! For example:
+//!
+//! ```markdown
+//! ~~~css
+//! * { color: tomato }
+//! ~~~
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <pre><code class="language-css">* { color: tomato }
+//! </code></pre>
+//! ```
+//!
+//! The `info` and `meta` parts are interpreted as the string content type.
+//! That means that character escapes and character reference are allowed.
+//!
+//! In markdown, it is also possible to use code (text) in the text content
+//! type.
+//! It is also possible to create code with the
+//! [code (indented)][code-indented] construct.
+//! That construct is less explicit, different from code (text), and has no
+//! support for specifying the programming language, so it is recommended to
+//! use code (fenced) instead of code (indented).
+//!
+//! ## References
+//!
+//! *   [`code-fenced.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js)
+//! *   [*§ 4.5 Fenced code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#fenced-code-blocks)
+//!
+//! [code-indented]: crate::construct::code_indented
+//! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
+//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+//!
+//! <!-- To do: link `flow`, `text`, `code_text`, `string` -->
+
+use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
+use crate::construct::partial_whitespace::start as whitespace;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::get_span;
+
+/// Kind of fences.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Kind {
+    /// Grave accent (tick) code.
+    GraveAccent,
+    /// Tilde code.
+    Tilde,
+}
+
+/// State needed to parse code (fenced).
+#[derive(Debug, Clone)]
+struct Info {
+    /// Number of markers on the opening fence sequence.
+    size: usize,
+    /// Number of tabs or spaces of indentation before the opening fence
+    /// sequence.
+    prefix: usize,
+    /// Kind of fences.
+    kind: Kind,
+}
+
+/// Start of fenced code.
+///
+/// ```markdown
+/// | ~~~js
+///  console.log(1);
+///  ~~~
+/// ```
+///
+/// Parsing note: normally, the prefix is already stripped.
+/// `flow.rs` makes sure that that doesn’t happen for code (fenced), as we need
+/// it.
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::CodeFenced);
+    tokenizer.enter(TokenType::CodeFencedFence);
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+        |_ok| Box::new(before_sequence_open),
+    )(tokenizer, code)
+}
+
+/// Inside the opening fence, after an optional prefix, before a sequence.
+///
+/// ```markdown
+/// |~~~js
+/// console.log(1);
+/// ~~~
+/// ```
+fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    let tail = tokenizer.events.last();
+    let mut prefix = 0;
+
+    if let Some(event) = tail {
+        if event.token_type == TokenType::Whitespace {
+            let span = get_span(&tokenizer.events, tokenizer.events.len() - 1);
+            prefix = span.end_index - span.start_index;
+        }
+    }
+
+    match code {
+        Code::Char(char) if char == '`' || char == '~' => {
+            tokenizer.enter(TokenType::CodeFencedFenceSequence);
+            sequence_open(
+                tokenizer,
+                Info {
+                    prefix,
+                    size: 0,
+                    kind: if char == '`' {
+                        Kind::GraveAccent
+                    } else {
+                        Kind::Tilde
+                    },
+                },
+                code,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside the opening fence sequence.
+///
+/// ```markdown
+/// ~|~~js
+/// console.log(1);
+/// ~~~
+/// ```
+fn sequence_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    let marker = if info.kind == Kind::GraveAccent {
+        '`'
+    } else {
+        '~'
+    };
+
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    let mut info = info;
+                    info.size += 1;
+                    sequence_open(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => {
+            if info.size < CODE_FENCED_SEQUENCE_SIZE_MIN {
+                (State::Nok, None)
+            } else {
+                tokenizer.exit(TokenType::CodeFencedFenceSequence);
+                tokenizer.attempt(
+                    |tokenizer, code| {
+                        whitespace(tokenizer, code, TokenType::CodeFencedFenceWhitespace)
+                    },
+                    |_ok| Box::new(|tokenizer, code| info_before(tokenizer, info, code)),
+                )(tokenizer, code)
+            }
+        }
+    }
+}
+
+/// Inside the opening fence, after the sequence (and optional whitespace), before the info.
+///
+/// ```markdown
+/// ~~~|js
+/// console.log(1);
+/// ~~~
+/// ```
+fn info_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFencedFence);
+            at_break(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.enter(TokenType::CodeFencedFenceInfo);
+            tokenizer.enter(TokenType::ChunkString);
+            info_inside(tokenizer, info, code, vec![])
+        }
+    }
+}
+
+/// Inside the opening fence info.
+///
+/// ```markdown
+/// ~~~j|s
+/// console.log(1);
+/// ~~~
+/// ```
+fn info_inside(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+    codes: Vec<Code>,
+) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            println!("to do: subtokenize: {:?}", codes);
+            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::CodeFencedFenceInfo);
+            tokenizer.exit(TokenType::CodeFencedFence);
+            at_break(tokenizer, info, code)
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            println!("to do: subtokenize: {:?}", codes);
+            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::CodeFencedFenceInfo);
+            tokenizer.attempt(
+                |tokenizer, code| whitespace(tokenizer, code, TokenType::CodeFencedFenceWhitespace),
+                |_ok| Box::new(|tokenizer, code| meta_before(tokenizer, info, code)),
+            )(tokenizer, code)
+        }
+        Code::Char(char) if char == '`' && info.kind == Kind::GraveAccent => (State::Nok, None),
+        Code::Char(_) => {
+            let mut codes = codes;
+            codes.push(code);
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    info_inside(tokenizer, info, code, codes)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// Inside the opening fence, after the info and whitespace, before the meta.
+///
+/// ```markdown
+/// ~~~js |eval
+/// console.log(1);
+/// ~~~
+/// ```
+fn meta_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFencedFence);
+            at_break(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.enter(TokenType::CodeFencedFenceMeta);
+            tokenizer.enter(TokenType::ChunkString);
+            meta(tokenizer, info, code)
+        }
+    }
+}
+
+/// Inside the opening fence meta.
+///
+/// ```markdown
+/// ~~~js e|val
+/// console.log(1);
+/// ~~~
+/// ```
+fn meta(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::CodeFencedFenceMeta);
+            tokenizer.exit(TokenType::CodeFencedFence);
+            at_break(tokenizer, info, code)
+        }
+        Code::Char(char) if char == '`' && info.kind == Kind::GraveAccent => (State::Nok, None),
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| meta(tokenizer, info, code))),
+                None,
+            )
+        }
+    }
+}
+
+/// At an eol/eof in code, before a closing fence or before content.
+///
+/// ```markdown
+/// ~~~js|
+/// aa|
+/// ~~~
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    let clone = info.clone();
+
+    match code {
+        Code::None => after(tokenizer, code),
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt(
+            |tokenizer, code| {
+                tokenizer.enter(TokenType::LineEnding);
+                tokenizer.consume(code);
+                tokenizer.exit(TokenType::LineEnding);
+                (
+                    State::Fn(Box::new(|tokenizer, code| {
+                        close_before(tokenizer, info, code)
+                    })),
+                    None,
+                )
+            },
+            |ok| {
+                if ok {
+                    Box::new(after)
+                } else {
+                    Box::new(|tokenizer, code| {
+                        tokenizer.enter(TokenType::LineEnding);
+                        tokenizer.consume(code);
+                        tokenizer.exit(TokenType::LineEnding);
+                        (
+                            State::Fn(Box::new(|tokenizer, code| {
+                                content_start(tokenizer, clone, code)
+                            })),
+                            None,
+                        )
+                    })
+                }
+            },
+        )(tokenizer, code),
+        _ => unreachable!("unexpected non-eol/eof after `at_break` `{:?}`", code),
+    }
+}
+
+/// Before a closing fence, before optional whitespace.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// |~~~
+///
+/// ~~~js
+/// console.log('1')
+/// |  ~~~
+/// ```
+fn close_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::CodeFencedFence);
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+        |_ok| Box::new(|tokenizer, code| close_sequence_before(tokenizer, info, code)),
+    )(tokenizer, code)
+}
+
+/// In a closing fence, after optional whitespace, before sequence.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// |~~~
+///
+/// ~~~js
+/// console.log('1')
+///   |~~~
+/// ```
+fn close_sequence_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    let tail = tokenizer.events.last();
+    let mut prefix = 0;
+    let marker = if info.kind == Kind::GraveAccent {
+        '`'
+    } else {
+        '~'
+    };
+
+    if let Some(event) = tail {
+        if event.token_type == TokenType::Whitespace {
+            let span = get_span(&tokenizer.events, tokenizer.events.len() - 1);
+            prefix = span.end_index - span.start_index;
+        }
+    }
+
+    // To do: 4+ should be okay if code (indented) is turned off!
+    if prefix >= TAB_SIZE {
+        return (State::Nok, None);
+    }
+
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.enter(TokenType::CodeFencedFenceSequence);
+            close_sequence(tokenizer, info, code, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In the closing fence sequence.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// ~|~~
+/// ```
+fn close_sequence(tokenizer: &mut Tokenizer, info: Info, code: Code, size: usize) -> StateFnResult {
+    let marker = if info.kind == Kind::GraveAccent {
+        '`'
+    } else {
+        '~'
+    };
+
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    close_sequence(tokenizer, info, code, size + 1)
+                })),
+                None,
+            )
+        }
+        _ if size >= CODE_FENCED_SEQUENCE_SIZE_MIN && size >= info.size => {
+            tokenizer.exit(TokenType::CodeFencedFenceSequence);
+            tokenizer.attempt(
+                |tokenizer, code| whitespace(tokenizer, code, TokenType::CodeFencedFenceWhitespace),
+                |_ok| Box::new(close_whitespace_after),
+            )(tokenizer, code)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After the closing fence sequence after optional whitespace.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// ~~~ |
+/// ```
+fn close_whitespace_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFencedFence);
+            (State::Ok, Some(vec![code]))
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Before code content, definitely not before a closing fence.
+///
+/// ```markdown
+/// ~~~js
+/// |aa
+/// ~~~
+/// ```
+fn content_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            at_break(tokenizer, info, code)
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') if info.prefix > 0 => {
+            tokenizer.enter(TokenType::Whitespace);
+            content_prefix(tokenizer, info, 0, code)
+        }
+        _ => {
+            tokenizer.enter(TokenType::CodeFlowChunk);
+            content_continue(tokenizer, info, code)
+        }
+    }
+}
+
+/// Before code content, in a prefix.
+///
+/// ```markdown
+///   ~~~js
+///  | aa
+///   ~~~
+/// ```
+fn content_prefix(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    prefix: usize,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') if info.prefix > prefix => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    content_prefix(tokenizer, info, prefix + 1, code)
+                })),
+                None,
+            )
+        }
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::Whitespace);
+            at_break(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.exit(TokenType::Whitespace);
+            tokenizer.enter(TokenType::CodeFlowChunk);
+            content_continue(tokenizer, info, code)
+        }
+    }
+}
+
+/// In code content.
+///
+/// ```markdown
+/// ~~~js
+/// |ab
+/// a|b
+/// ab|
+/// ~~~
+/// ```
+fn content_continue(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFlowChunk);
+            at_break(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    content_continue(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// After fenced code.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')
+/// ~~~|
+/// ```
+fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.exit(TokenType::CodeFenced);
+    (State::Ok, Some(vec![code]))
+}
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
new file mode 100644
index 0000000..6bf089b
--- /dev/null
+++ b/src/construct/code_indented.rs
@@ -0,0 +1,190 @@
+//! Code (indented) is a construct that occurs in the flow content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! code_indented ::= indented_filled_line *( eol *( blank_line eol ) indented_filled_line )
+//!
+//! ; Restriction: at least one `code` must not be whitespace.
+//! indented_filled_line ::= 4space_or_tab *code
+//! blank_line ::= *space_or_tab
+//! eol ::= '\r' | '\r\n' | '\n'
+//! code ::= . ; any unicode code point (other than line endings).
+//! space_or_tab ::= ' ' | '\t'
+//! ```
+//!
+//! Code (indented) relates to both the `<pre>` and the `<code>` elements in
+//! HTML.
+//! See [*§ 4.4.3 The `pre` element*][html-pre] and the [*§ 4.5.15 The `code`
+//! element*][html-code] in the HTML spec for more info.
+//!
+//! In markdown, it is also possible to use code (text) in the text content
+//! type.
+//! It is also possible to create code with the [code (fenced)][code-fenced]
+//! construct.
+//! That construct is more explicit, more similar to code (text), and has
+//! support for specifying the programming language that the code is in, so it
+//! is recommended to use that instead of indented code.
+//!
+//! ## References
+//!
+//! *   [`code-indented.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-indented.js)
+//! *   [*§ 4.4 Indented code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#indented-code-blocks)
+//!
+//! [code-fenced]: crate::construct::code_fenced
+//! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
+//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+//!
+//! <!-- To do: link `flow`, `code_text` -->
+
+use crate::constant::TAB_SIZE;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of code (indented).
+///
+/// ```markdown
+/// |    asd
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char(' ' | '\t') => {
+            tokenizer.enter(TokenType::CodeIndented);
+            tokenizer.enter(TokenType::CodeIndentedPrefixWhitespace);
+            indent(tokenizer, code, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside the initial whitespace.
+///
+/// ```markdown
+///  |   asd
+///   |  asd
+///    | asd
+///     |asd
+/// ```
+///
+/// > **Parsing note**: it is not needed to check if this first line is a
+/// > filled line (that it has a non-whitespace character), because blank lines
+/// > are parsed already, so we never run into that.
+fn indent(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+    match code {
+        _ if size == TAB_SIZE => {
+            tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace);
+            at_break(tokenizer, code)
+        }
+        Code::VirtualSpace | Code::Char(' ' | '\t') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    indent(tokenizer, code, size + 1)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// At a break.
+///
+/// ```markdown
+///     |asd
+///     asd|
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => after(tokenizer, code),
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer
+            .attempt(further_start, |ok| {
+                Box::new(if ok { at_break } else { after })
+            })(tokenizer, code),
+        _ => {
+            tokenizer.enter(TokenType::CodeFlowChunk);
+            content(tokenizer, code)
+        }
+    }
+}
+
+/// Inside code content.
+///
+/// ```markdown
+///     |ab
+///     a|b
+///     ab|
+/// ```
+fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeFlowChunk);
+            at_break(tokenizer, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(content)), None)
+        }
+    }
+}
+
+/// After indented code.
+///
+/// ```markdown
+///     ab|
+/// ```
+fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.exit(TokenType::CodeIndented);
+    (State::Ok, Some(vec![code]))
+}
+
+/// Right at a line ending, trying to parse another indent.
+///
+/// ```markdown
+///     ab|
+///     cd
+/// ```
+fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // To do: `nok` if lazy line.
+    match code {
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.enter(TokenType::LineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::LineEnding);
+            (State::Fn(Box::new(further_start)), None)
+        }
+        Code::VirtualSpace | Code::Char(' ' | '\t') => {
+            tokenizer.enter(TokenType::CodeIndentedPrefixWhitespace);
+            further_indent(tokenizer, code, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// Inside further whitespace.
+///
+/// ```markdown
+///     asd
+///   |  asd
+/// ```
+fn further_indent(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+    match code {
+        _ if size == TAB_SIZE => {
+            tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace);
+            (State::Ok, Some(vec![code]))
+        }
+        Code::VirtualSpace | Code::Char(' ' | '\t') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    further_indent(tokenizer, code, size + 1)
+                })),
+                None,
+            )
+        }
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace);
+            further_start(tokenizer, code)
+        }
+        _ => (State::Nok, None),
+    }
+}
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
new file mode 100644
index 0000000..b3aef1b
--- /dev/null
+++ b/src/construct/heading_atx.rs
@@ -0,0 +1,175 @@
+//! Heading (atx) is a construct that occurs in the flow content type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! heading_atx ::= 1*6'#' [ 1*space_or_tab code [ 1*space_or_tab 1*'#' ] ] *space_or_tab
+//!
+//! code ::= . ; any unicode code point (other than line endings).
+//! space_or_tab ::= ' ' | '\t'
+//! ```
+//!
+//! Headings in markdown relate to the `<h1>` through `<h6>` elements in HTML.
+//! See [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements* in the
+//! HTML spec][html] for more info.
+//!
+//! `CommonMark` introduced the requirement on whitespace existing after the
+//! opening sequence and before text.
+//! In older markdown versions, this was not required, and headings would form
+//! without it.
+//!
+//! In markdown, it is also possible to create headings with the setext heading
+//! construct.
+//! The benefit of setext headings is that their text can include line endings.
+//! However, their limit is that they cannot form `<h3>` through `<h6>`
+//! headings.
+//! Due to this limitation, it is recommended to use atx headings.
+//!
+//! > 🏛 **Background**: the word *setext* originates from a small markup
+//! > language by Ian Feldman from 1991.
+//! > See [*§ Setext* on Wikipedia][wiki-setext] for more info.
+//! > The word *atx* originates from a tiny markup language by Aaron Swartz
+//! > from 2002.
+//! > See [*§ atx, the true structured text format* on `aaronsw.com`][atx] for
+//! > more info.
+//!
+//! ## References
+//!
+//! *   [`heading-atx.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/heading-atx.js)
+//! *   [*§ 4.2 ATX headings* in `CommonMark`](https://spec.commonmark.org/0.30/#atx-headings)
+//!
+//! [html]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
+//! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
+//! [atx]: http://www.aaronsw.com/2002/atx/
+//!
+//! <!-- To do: link `flow`, `setext` -->
+
+use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a heading (atx).
+///
+/// ```markdown
+/// |## alpha
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if Code::Char('#') == code {
+        tokenizer.enter(TokenType::AtxHeading);
+        tokenizer.enter(TokenType::AtxHeadingSequence);
+        sequence_open(tokenizer, code, 0)
+    } else {
+        (State::Nok, None)
+    }
+}
+
+/// In the opening sequence.
+///
+/// ```markdown
+/// #|# alpha
+/// ```
+fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnResult {
+    match code {
+        Code::None
+        | Code::CarriageReturnLineFeed
+        | Code::VirtualSpace
+        | Code::Char('\t' | '\n' | '\r' | ' ')
+            if rank > 0 =>
+        {
+            tokenizer.exit(TokenType::AtxHeadingSequence);
+            at_break(tokenizer, code)
+        }
+        Code::Char('#') if rank < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    sequence_open(tokenizer, code, rank + 1)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After something but before something else.
+///
+/// ```markdown
+/// ## |alpha
+/// ## alpha| bravo
+/// ## alpha |bravo
+/// ## alpha bravo|##
+/// ## alpha bravo ##|
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::AtxHeading);
+            (State::Ok, Some(vec![code]))
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.enter(TokenType::AtxHeadingWhitespace);
+            whitespace(tokenizer, code)
+        }
+        Code::Char('#') => {
+            tokenizer.enter(TokenType::AtxHeadingSequence);
+            further_sequence(tokenizer, code)
+        }
+        Code::Char(_) => {
+            tokenizer.enter(TokenType::AtxHeadingText);
+            data(tokenizer, code)
+        }
+    }
+}
+
+/// In a further sequence (after whitespace).
+/// Could be normal “visible” hashes in the heading or a final sequence.
+///
+/// ```markdown
+/// ## alpha #|#
+/// ```
+fn further_sequence(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if let Code::Char('#') = code {
+        tokenizer.consume(code);
+        (State::Fn(Box::new(further_sequence)), None)
+    } else {
+        tokenizer.exit(TokenType::AtxHeadingSequence);
+        at_break(tokenizer, code)
+    }
+}
+
+/// In whitespace.
+///
+/// ```markdown
+/// ## alpha | bravo
+/// ```
+fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(whitespace)), None)
+        }
+        _ => {
+            tokenizer.exit(TokenType::AtxHeadingWhitespace);
+            at_break(tokenizer, code)
+        }
+    }
+}
+
+/// In text.
+///
+/// ```markdown
+/// ## al|pha
+/// ```
+fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\t' | '\n' | '\r' | ' ') => {
+            tokenizer.exit(TokenType::AtxHeadingText);
+            at_break(tokenizer, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(data)), None)
+        }
+    }
+}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
new file mode 100644
index 0000000..b7d5570
--- /dev/null
+++ b/src/construct/html_flow.rs
@@ -0,0 +1,1068 @@
+//! HTML (flow) is a construct that occurs in the flow content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! html_flow ::= raw | comment | instruction | declaration | cdata | basic | complete
+//!
+//! ; Note: closing tag name need to match opening tag name.
+//! raw ::= '<' raw_tag_name [ [ ( whitespace | '>' ) *line ] *( eol *line ) ] [ '</' raw_tag_name *line ]
+//! comment ::= '<!--' [ *'-' '>' *line | *line *( eol *line ) [ '-->' *line ] ]
+//! instruction ::= '<?' [ '>' *line | *line *( eol *line ) [ '?>' *line ] ]
+//! declaration ::= '<!' ascii_alphabetic *line *( eol *line ) [ '>' *line ]
+//! cdata ::= '<![CDATA[' *line *( eol *line ) [ ']]>' *line ]
+//! basic ::= '< [ '/' ] basic_tag_name [ [ '/' ] '>' *line *( eol 1*line ) ]
+//! complete ::= ( opening_tag | closing_tag ) ( whitespace_optional *( eol 1*line ) | whitespace_optional )
+//!
+//! raw_tag_name ::= 'pre' | 'script' | 'style' | 'textarea' ; Note: case-insensitive.
+//! basic_tag_name ::= 'address' | 'article' | 'aside' | ... ; See `constants.rs`, and note: case-insensitive.
+//! opening_tag ::= '<' tag_name *( whitespace attribute ) [ whitespace_optional '/' ] whitespace_optional '>'
+//! closing_tag ::= '</' tag_name whitespace_optional '>'
+//! tag_name ::= ascii_alphabetic *( '-' | ascii_alphanumeric )
+//! attribute ::= attribute_name [ whitespace_optional '=' whitespace_optional attribute_value ]
+//! attribute_name ::= ( ':' | '_' | ascii_alphabetic ) *( '-' | '.' | ':' | '_' | ascii_alphanumeric )
+//! attribute_value ::= '"' *( line - '"' ) '"' | "'" *( line - "'" )  "'" | 1*( line - space_or_tab - '"' - "'" - '/' - '<' - '=' - '>' - '`')
+//!
+//! whitespace ::= 1*space_or_tab
+//! whitespace_optional ::= [ space_or_tab ]
+//! line ::= code - eol
+//! eol ::= '\r' | '\r\n' | '\n'
+//! space_or_tab ::= ' ' | '\t'
+//! ```
+//!
+//! The grammar for HTML in markdown does not resemble the rules of parsing
+//! HTML according to the [*§ 13.2 Parsing HTML documents* in the HTML
+//! spec][html-parsing].
+//! As such, HTML in markdown *resembles* HTML, but is instead a (naïve?)
+//! attempt to parse an XML-like language.
+//! By extension, another notable property of the grammar is that it can
+//! result in invalid HTML, in that it allows things that wouldn’t work or
+//! wouldn’t work well in HTML, such as mismatched tags.
+//!
+//! Because the **basic** and **complete** productions in the grammar form with
+//! a tag, followed by more stuff, and stop at a blank line, it is possible to
+//! interleave (a word for switching between languages) markdown and HTML
+//! together, by placing the opening and closing tags on their own lines,
+//! with blank lines between them and markdown.
+//! For example:
+//!
+//! ```markdown
+//! <div>This is a <code>div</code> but *this* is not emphasis.</div>
+//!
+//! <div>
+//!
+//! This is a paragraph in a `div` and *this* is emphasis.
+//!
+//! </div>
+//! ```
+//!
+//! The **complete** production of HTML (flow) is not allowed to interrupt
+//! content.
+//! That means that a blank line is needed between a paragraph and it.
+//! However, HTML (text) has a similar production, which will typically kick-in
+//! instead.
+//!
+//! The list of tag names allowed in the **raw** production are defined in
+//! [`HTML_RAW_NAMES`][html_raw_names].
+//! This production exists because there are a few cases where markdown
+//! *inside* some elements, and hence interleaving, does not make sense.
+//!
+//! The list of tag names allowed in the **basic** production are defined in
+//! [`HTML_BLOCK_NAMES`][html_block_names].
+//! This production exists because there are a few cases where we can decide
+//! early that something is going to be a flow (block) element instead of a
+//! phrasing (inline) element.
+//! We *can* interrupt and don’t have to care too much about it being
+//! well-formed.
+//!
+//! ## References
+//!
+//! *   [`html-flow.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-flow.js)
+//! *   [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks)
+//!
+//! [html_raw_names]: crate::constant::HTML_RAW_NAMES
+//! [html_block_names]: crate::constant::HTML_BLOCK_NAMES
+//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
+//!
+//! <!-- To do: link stuff -->
+
+use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX};
+use crate::construct::{blank_line::start as blank_line, partial_whitespace::start as whitespace};
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Kind of HTML (flow).
+#[derive(Debug, Clone, PartialEq)]
+enum Kind {
+    /// Not yet known.
+    Unknown,
+    /// Symbol for `<script>` (condition 1).
+    Raw,
+    /// Symbol for `<!---->` (condition 2).
+    Comment,
+    /// Symbol for `<?php?>` (condition 3).
+    Instruction,
+    /// Symbol for `<!doctype>` (condition 4).
+    Declaration,
+    /// Symbol for `<![CDATA[]]>` (condition 5).
+    Cdata,
+    /// Symbol for `<div` (condition 6).
+    Basic,
+    /// Symbol for `<x>` (condition 7).
+    Complete,
+}
+
+/// Type of quote, if we’re in an attribure, in complete (condition 7).
+#[derive(Debug, Clone, PartialEq)]
+enum QuoteKind {
+    /// Not in a quoted attribute.
+    None,
+    /// In a double quoted (`"`) attribute.
+    Double,
+    /// In a single quoted (`"`) attribute.
+    Single,
+}
+
+/// State needed to parse HTML (flow).
+#[derive(Debug, Clone)]
+struct Info {
+    /// Kind of HTML (flow).
+    kind: Kind,
+    /// Whether this is a start tag (`<` not followed by `/`).
+    start_tag: bool,
+    /// Used depending on `kind` to either collect all parsed characters, or to
+    /// store expected characters.
+    buffer: Vec<char>,
+    /// `index` into `buffer` when expecting certain characters.
+    index: usize,
+    /// Current quote, when in a double or single quoted attribute value.
+    quote: QuoteKind,
+}
+
+// To do: mark as concrete (block quotes or lists can’t “pierce” into HTML).
+
+/// Start of HTML (flow), before optional whitespace.
+///
+/// ```markdown
+/// |<x />
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::HtmlFlow);
+    tokenizer.enter(TokenType::HtmlFlowData);
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+        |_ok| Box::new(before),
+    )(tokenizer, code)
+}
+
+/// After optional whitespace, before `<`.
+///
+/// ```markdown
+/// |<x />
+/// ```
+fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if Code::Char('<') == code {
+        tokenizer.consume(code);
+        (
+            State::Fn(Box::new(|tokenizer, code| {
+                open(
+                    tokenizer,
+                    Info {
+                        kind: Kind::Unknown,
+                        start_tag: false,
+                        buffer: vec![],
+                        index: 0,
+                        quote: QuoteKind::None,
+                    },
+                    code,
+                )
+            })),
+            None,
+        )
+    } else {
+        (State::Nok, None)
+    }
+}
+
+/// After `<`, before a tag name or other stuff.
+///
+/// ```markdown
+/// <|x />
+/// <|!doctype />
+/// <|!--xxx--/>
+/// ```
+fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('!') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    declaration_start(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('/') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    tag_close_start(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('?') => {
+            // To do: life times.
+            let mut clone = info;
+            clone.kind = Kind::Instruction;
+            tokenizer.consume(code);
+            // While we’re in an instruction instead of a declaration, we’re on a `?`
+            // right now, so we do need to search for `>`, similar to declarations.
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(char) if char.is_ascii_alphabetic() => {
+            // To do: life times.
+            let mut clone = info;
+            clone.start_tag = true;
+            tag_name(tokenizer, clone, code)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `<!`, so inside a declaration, comment, or CDATA.
+///
+/// ```markdown
+/// <!|doctype />
+/// <!|--xxx--/>
+/// <!|[CDATA[>&<]]>
+/// ```
+fn declaration_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('-') => {
+            tokenizer.consume(code);
+            let mut clone = info;
+            clone.kind = Kind::Comment;
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    comment_open_inside(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('[') => {
+            tokenizer.consume(code);
+            let mut clone = info;
+            clone.kind = Kind::Cdata;
+            clone.buffer = vec!['C', 'D', 'A', 'T', 'A', '['];
+            clone.index = 0;
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    cdata_open_inside(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(char) if char.is_ascii_alphabetic() => {
+            tokenizer.consume(code);
+            // To do: life times.
+            let mut clone = info;
+            clone.kind = Kind::Declaration;
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `<!-`, inside a comment, before another `-`.
+///
+/// ```markdown
+/// <!-|-xxx--/>
+/// ```
+fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('-') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `<![`, inside CDATA, expecting `CDATA[`.
+///
+/// ```markdown
+/// <![|CDATA[>&<]]>
+/// <![CD|ATA[>&<]]>
+/// <![CDA|TA[>&<]]>
+/// <![CDAT|A[>&<]]>
+/// <![CDATA|[>&<]]>
+/// ```
+fn cdata_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == info.buffer[info.index] => {
+            let mut clone = info;
+            clone.index += 1;
+            tokenizer.consume(code);
+
+            if clone.index == clone.buffer.len() {
+                clone.buffer.clear();
+                (
+                    State::Fn(Box::new(|tokenizer, code| {
+                        continuation(tokenizer, clone, code)
+                    })),
+                    None,
+                )
+            } else {
+                (
+                    State::Fn(Box::new(|tokenizer, code| {
+                        cdata_open_inside(tokenizer, clone, code)
+                    })),
+                    None,
+                )
+            }
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `</`, in a closing tag, before a tag name.
+///
+/// ```markdown
+/// </|x>
+/// ```
+fn tag_close_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char.is_ascii_alphabetic() => {
+            tokenizer.consume(code);
+            // To do: life times.
+            let mut clone = info;
+            clone.buffer.push(char);
+            (
+                State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In a tag name.
+///
+/// ```markdown
+/// <a|b>
+/// </a|b>
+/// ```
+fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None
+        | Code::CarriageReturnLineFeed
+        | Code::VirtualSpace
+        | Code::Char('\t' | '\n' | '\r' | ' ' | '/' | '>') => {
+            let tag_name_buffer = info.buffer.iter().collect::<String>().to_lowercase();
+            let name = tag_name_buffer.as_str();
+            let slash = if let Code::Char(char) = code {
+                char == '/'
+            } else {
+                false
+            };
+
+            if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) {
+                // To do: life times.
+                let mut clone = info;
+                clone.kind = Kind::Raw;
+                clone.buffer.clear();
+                continuation(tokenizer, clone, code)
+            } else if HTML_BLOCK_NAMES.contains(&name) {
+                // To do: life times.
+                let mut clone = info;
+                clone.kind = Kind::Basic;
+                clone.buffer.clear();
+
+                if slash {
+                    tokenizer.consume(code);
+                    (
+                        State::Fn(Box::new(|tokenizer, code| {
+                            basic_self_closing(tokenizer, clone, code)
+                        })),
+                        None,
+                    )
+                } else {
+                    continuation(tokenizer, clone, code)
+                }
+            } else {
+                // To do: life times.
+                let mut clone = info;
+                clone.kind = Kind::Complete;
+
+                // To do: do not support complete HTML when interrupting.
+                if clone.start_tag {
+                    complete_attribute_name_before(tokenizer, clone, code)
+                } else {
+                    complete_closing_tag_after(tokenizer, clone, code)
+                }
+            }
+        }
+        Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => {
+            tokenizer.consume(code);
+            let mut clone = info;
+            clone.buffer.push(char);
+            (
+                State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))),
+                None,
+            )
+        }
+        Code::Char(_) => (State::Nok, None),
+    }
+}
+
+/// After a closing slash of a basic tag name.
+///
+/// ```markdown
+/// <div/|>
+/// ```
+fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After a closing slash of a complete tag name.
+///
+/// ```markdown
+/// <x/|>
+/// </x/|>
+/// ```
+fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_closing_tag_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_end(tokenizer, info, code),
+    }
+}
+
+/// At a place where an attribute name would be valid.
+///
+/// At first, this state is used after a complete tag name, after whitespace,
+/// where it expects optional attributes or the end of the tag.
+/// It is also reused after attributes, when expecting more optional
+/// attributes.
+///
+/// ```markdown
+/// <x |/>
+/// <x |:asd>
+/// <x |_asd>
+/// <x |asd>
+/// <x | >
+/// <x |>
+/// ```
+fn complete_attribute_name_before(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::Char('/') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_end(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(char) if char == ':' || char == '_' || char.is_ascii_alphabetic() => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_name(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_name_before(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_end(tokenizer, info, code),
+    }
+}
+
+/// In an attribute name.
+///
+/// ```markdown
+/// <x :|>
+/// <x _|>
+/// <x a|>
+/// ```
+fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char)
+            if char == '-'
+                || char == '.'
+                || char == ':'
+                || char == '_'
+                || char.is_ascii_alphanumeric() =>
+        {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_name(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_attribute_name_after(tokenizer, info, code),
+    }
+}
+
+/// After an attribute name, before an attribute initializer, the end of the
+/// tag, or whitespace.
+///
+/// ```markdown
+/// <x a|>
+/// <x a|=b>
+/// <x a|="c">
+/// ```
+fn complete_attribute_name_after(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::Char('=') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_before(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_name_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_attribute_name_before(tokenizer, info, code),
+    }
+}
+
+/// Before an unquoted, double quoted, or single quoted attribute value,
+/// allowing whitespace.
+///
+/// ```markdown
+/// <x a=|b>
+/// <x a=|"c">
+/// ```
+fn complete_attribute_value_before(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None),
+        Code::Char(char) if char == '"' || char == '\'' => {
+            tokenizer.consume(code);
+            // To do: life times.
+            let mut clone = info;
+            clone.quote = if char == '"' {
+                QuoteKind::Double
+            } else {
+                QuoteKind::Single
+            };
+
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_quoted(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_before(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => complete_attribute_value_unquoted(tokenizer, info, code),
+    }
+}
+
+/// In a double or single quoted attribute value.
+///
+/// ```markdown
+/// <x a="|">
+/// <x a='|'>
+/// ```
+fn complete_attribute_value_quoted(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    let marker = if info.quote == QuoteKind::Double {
+        '"'
+    } else {
+        '\''
+    };
+
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
+        Code::Char(char) if char == marker => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_quoted_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_quoted(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// In an unquoted attribute value.
+///
+/// ```markdown
+/// <x a=b|c>
+/// ```
+fn complete_attribute_value_unquoted(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::None
+        | Code::CarriageReturnLineFeed
+        | Code::VirtualSpace
+        | Code::Char('\t' | '\n' | '\r' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => {
+            complete_attribute_name_after(tokenizer, info, code)
+        }
+        Code::Char(_) => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_attribute_value_unquoted(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// After a double or single quoted attribute value, before whitespace or the
+/// end of the tag.
+///
+/// ```markdown
+/// <x a="b"|>
+/// ```
+fn complete_attribute_value_quoted_after(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => {
+            complete_attribute_name_before(tokenizer, info, code)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In certain circumstances of a complete tag where only an `>` is allowed.
+///
+/// ```markdown
+/// <x a="b"|>
+/// ```
+fn complete_end(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After `>` in a complete tag.
+///
+/// ```markdown
+/// <x>|
+/// ```
+fn complete_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            continuation(tokenizer, info, code)
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    complete_after(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(_) => (State::Nok, None),
+    }
+}
+
+/// Inside continuation of any HTML kind.
+///
+/// ```markdown
+/// <!--x|xx-->
+/// ```
+fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('-') if info.kind == Kind::Comment => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_comment_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('<') if info.kind == Kind::Raw => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_raw_tag_open(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('>') if info.kind == Kind::Declaration => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_close(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('?') if info.kind == Kind::Instruction => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char(']') if info.kind == Kind::Cdata => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_character_data_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
+            if info.kind == Kind::Basic || info.kind == Kind::Complete =>
+        {
+            let clone = info;
+
+            tokenizer.check(blank_line_before, |ok| {
+                if ok {
+                    Box::new(|tokenizer, code| continuation_close(tokenizer, clone, code))
+                } else {
+                    Box::new(|tokenizer, code| continuation_at_line_ending(tokenizer, clone, code))
+                }
+            })(tokenizer, code)
+        }
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            continuation_at_line_ending(tokenizer, info, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// In continuation, before an eol or eof.
+///
+/// ```markdown
+/// <x>|
+/// ```
+fn continuation_at_line_ending(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    tokenizer.exit(TokenType::HtmlFlowData);
+    html_continue_start(tokenizer, info, code)
+}
+
+/// In continuation, after an eol.
+///
+/// ```markdown
+/// <x>|
+/// asd
+/// ```
+fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None => {
+            tokenizer.exit(TokenType::HtmlFlow);
+            (State::Ok, Some(vec![code]))
+        }
+        // To do: do not allow lazy lines.
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.enter(TokenType::LineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::LineEnding);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    html_continue_start(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.enter(TokenType::HtmlFlowData);
+            continuation(tokenizer, info, code)
+        }
+    }
+}
+
+/// In comment continuation, after one `-`, expecting another.
+///
+/// ```markdown
+/// <!--xxx-|->
+/// ```
+fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('-') if info.kind == Kind::Comment => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In raw continuation, after `<`, expecting a `/`.
+///
+/// ```markdown
+/// <script>console.log(1)<|/script>
+/// ```
+fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('/') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_raw_end_tag(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In raw continuation, after `</`, expecting or inside a raw tag name.
+///
+/// ```markdown
+/// <script>console.log(1)</|script>
+/// <script>console.log(1)</s|cript>
+/// <script>console.log(1)</script|>
+/// ```
+fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            let tag_name_buffer = info.buffer.iter().collect::<String>().to_lowercase();
+            // To do: life times.
+            let mut clone = info;
+            clone.buffer.clear();
+
+            if HTML_RAW_NAMES.contains(&tag_name_buffer.as_str()) {
+                tokenizer.consume(code);
+                (
+                    State::Fn(Box::new(|tokenizer, code| {
+                        continuation_close(tokenizer, clone, code)
+                    })),
+                    None,
+                )
+            } else {
+                continuation(tokenizer, clone, code)
+            }
+        }
+        Code::Char(char) if char.is_ascii_alphabetic() && info.buffer.len() < HTML_RAW_SIZE_MAX => {
+            tokenizer.consume(code);
+            // To do: life times.
+            let mut clone = info;
+            clone.buffer.push(char);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_raw_end_tag(tokenizer, clone, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In cdata continuation, after `]`, expecting `]>`.
+///
+/// ```markdown
+/// <![CDATA[>&<]|]>
+/// ```
+fn continuation_character_data_inside(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::Char(']') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In declaration or instruction continuation, waiting for `>` to close it.
+///
+/// ```markdown
+/// <!--|>
+/// <?ab?|>
+/// <?|>
+/// <!q|>
+/// <!--ab--|>
+/// <!--ab--|->
+/// <!--ab---|>
+/// <![CDATA[>&<]]|>
+/// ```
+fn continuation_declaration_inside(
+    tokenizer: &mut Tokenizer,
+    info: Info,
+    code: Code,
+) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_close(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        Code::Char('-') if info.kind == Kind::Comment => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_declaration_inside(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+        _ => continuation(tokenizer, info, code),
+    }
+}
+
+/// In closed continuation: everything we get until the eol/eof is part of it.
+///
+/// ```markdown
+/// <!doctype>|
+/// ```
+fn continuation_close(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::HtmlFlowData);
+            tokenizer.exit(TokenType::HtmlFlow);
+            (State::Ok, Some(vec![code]))
+        }
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    continuation_close(tokenizer, info, code)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// Before a line ending, expecting a blank line.
+///
+/// ```markdown
+/// <div>|
+///
+/// ```
+fn blank_line_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::LineEnding);
+    tokenizer.consume(code);
+    tokenizer.exit(TokenType::LineEnding);
+    (State::Fn(Box::new(blank_line)), None)
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
new file mode 100644
index 0000000..d671db6
--- /dev/null
+++ b/src/construct/mod.rs
@@ -0,0 +1,11 @@
+//! Constructs found in markdown.
+
+pub mod blank_line;
+pub mod character_escape;
+pub mod character_reference;
+pub mod code_fenced;
+pub mod code_indented;
+pub mod heading_atx;
+pub mod html_flow;
+pub mod partial_whitespace;
+pub mod thematic_break;
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
new file mode 100644
index 0000000..dd0d2b5
--- /dev/null
+++ b/src/construct/partial_whitespace.rs
@@ -0,0 +1,66 @@
+//! A little helper to parse `space_or_tab`
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! space_or_tab ::= 1*(' ' '\t')
+//! ```
+//!
+//! Depending on where whitespace can occur, it can be optional (or not),
+//! and present in the rendered result (or not).
+//!
+//! ## References
+//!
+//! *   [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js)
+//!
+//! <!-- To do: link stuff -->
+
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+// To do: should `token_type` be a `Some`, with `None` defaulting to something?
+// To do: should `max: Some(usize)` be added?
+
+/// Before whitespace.
+///
+/// ```markdown
+/// alpha| bravo
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code, token_type: TokenType) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            // To do: lifetimes.
+            let clone = token_type.clone();
+            tokenizer.enter(token_type);
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| inside(tokenizer, code, clone))),
+                None,
+            )
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In whitespace.
+///
+/// ```markdown
+/// alpha |bravo
+/// alpha | bravo
+/// ```
+fn inside(tokenizer: &mut Tokenizer, code: Code, token_type: TokenType) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(|tokenizer, code| {
+                    inside(tokenizer, code, token_type)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.exit(token_type);
+            (State::Ok, Some(vec![code]))
+        }
+    }
+}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
new file mode 100644
index 0000000..15ebac7
--- /dev/null
+++ b/src/construct/thematic_break.rs
@@ -0,0 +1,137 @@
+//! Thematic breaks, sometimes called horizontal rules, are a construct that
+//! occurs in the flow content type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: all markers must be identical.
+//! ; Restriction: at least 3 markers must be used.
+//! thematic_break ::= *space_or_tab 1*(1*marker *space_or_tab)
+//!
+//! space_or_tab ::= ' ' | '\t'
+//! marker ::= '*' | '-' | '_'
+//! ```
+//!
+//! Thematic breaks in markdown typically relate to the HTML element `<hr>`.
+//! See [*§ 4.4.2 The `hr` element* in the HTML spec][html] for more info.
+//!
+//! It is recommended to use exactly three asterisks without whitespace when
+//! writing markdown.
+//! As using more than three markers has no effect other than wasting space,
+//! it is recommended to use exactly three markers.
+//! Thematic breaks formed with asterisks or dashes can interfere with lists
+//! in if there is whitespace between them: `* * *` and `- - -`.
+//! For these reasons, it is recommend to not use spaces or tabs between the
+//! markers.
+//! Thematic breaks formed with dashes (without whitespace) can also form
+//! setext headings.
+//! As dashes and underscores frequently occur in natural language and URLs, it
+//! is recommended to use asterisks for thematic breaks to distinguish from
+//! such use.
+//! Because asterisks can be used to form the most markdown constructs, using
+//! them has the added benefit of making it easier to gloss over markdown: you
+//! can look for asterisks to find syntax while not worrying about other
+//! characters.
+//!
+//! ## References
+//!
+//! *   [`thematic-break.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/thematic-break.js)
+//! *   [*§ 4.1 Thematic breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#thematic-breaks)
+//!
+//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-hr-element
+//!
+//! <!-- To do: link `flow` -->
+
+use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a thematic break.
+///
+/// ```markdown
+/// |***
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == '*' || char == '-' || char == '_' => {
+            tokenizer.enter(TokenType::ThematicBreak);
+            at_break(tokenizer, code, char, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// After something but before something else.
+///
+/// ```markdown
+/// |***
+/// *| * *
+/// * |* *
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.enter(TokenType::ThematicBreakSequence);
+            sequence(tokenizer, code, marker, size)
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.enter(TokenType::ThematicBreakWhitespace);
+            whitespace(tokenizer, code, marker, size)
+        }
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
+            if size >= THEMATIC_BREAK_MARKER_COUNT_MIN =>
+        {
+            tokenizer.exit(TokenType::ThematicBreak);
+            (State::Ok, Some(vec![code]))
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In a sequence of markers.
+///
+/// ```markdown
+/// |***
+/// *|**
+/// **|*
+/// ```
+fn sequence(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    sequence(tokenizer, code, marker, size + 1)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.exit(TokenType::ThematicBreakSequence);
+            at_break(tokenizer, code, marker, size)
+        }
+    }
+}
+
+/// In whitespace.
+///
+/// ```markdown
+/// * |* *
+/// * | * *
+/// ```
+fn whitespace(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    whitespace(tokenizer, code, marker, size)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.exit(TokenType::ThematicBreakWhitespace);
+            at_break(tokenizer, code, marker, size)
+        }
+    }
+}
author	Titus Wormer <tituswormer@gmail.com>	2022-06-08 15:52:16 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-08 15:52:16 +0200
commit	4c06c8554c35887f8f5147783953b2b7e7c2327f (patch)
tree	1b2463848a3ae4c645f7f1a325877ee829ab65c5 /src/construct
download	markdown-rs-4c06c8554c35887f8f5147783953b2b7e7c2327f.tar.gz markdown-rs-4c06c8554c35887f8f5147783953b2b7e7c2327f.tar.bz2 markdown-rs-4c06c8554c35887f8f5147783953b2b7e7c2327f.zip