Refactor to move `content` to `construct`

author: Titus Wormer <tituswormer@gmail.com> 2022-08-15 11:40:40 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-08-15 11:40:40 +0200
commit: ee967aa634b5f8e9d30329d587538f1371a5da95 (patch)
tree: cdc1461c822e440b24428eb8d431881e216ab8bd /src/construct
parent: 13135666fac476f3cd6f059147f496533b304097 (diff)
download: markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.gz
markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.bz2
markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.zip
32 files changed, 1037 insertions, 38 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index ae8da81..1dc8868 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -47,7 +47,7 @@
 //! *   [`attention.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/attention.js)
 //! *   [*§ 6.2 Emphasis and strong emphasis* in `CommonMark`](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis)
 //!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
 //! [html-em]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-em-element
 //! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element
 
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 5c826a3..37e21d9 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -94,7 +94,7 @@
 //! *   [`autolink.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/autolink.js)
 //! *   [*§ 6.4 Autolinks* in `CommonMark`](https://spec.commonmark.org/0.30/#autolinks)
 //!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
 //! [label_end]: crate::construct::label_end
 //! [autolink_scheme_size_max]: crate::constant::AUTOLINK_SCHEME_SIZE_MAX
 //! [autolink_domain_size_max]: crate::constant::AUTOLINK_DOMAIN_SIZE_MAX
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index 87d257d..928b8cc 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -30,7 +30,7 @@
 //! [heading-atx]: crate::construct::heading_atx
 //! [list-item]: crate::construct::list_item
 //! [paragraph]: crate::construct::paragraph
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
 
 use crate::construct::partial_space_or_tab::space_or_tab;
 use crate::state::{Name as StateName, State};
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 4f0870f..37726c5 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -29,7 +29,7 @@
 //! *   [`block-quote.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/block-quote.js)
 //! *   [*§ 5.1 Block quotes* in `CommonMark`](https://spec.commonmark.org/0.30/#block-quotes)
 //!
-//! [document]: crate::content::document
+//! [document]: crate::construct::document
 //! [html-blockquote]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-blockquote-element
 //! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure
 
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index ac91c29..6dac458 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -28,8 +28,8 @@
 //! *   [`character-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/character-escape.js)
 //! *   [*§ 2.4 Backslash escapes* in `CommonMark`](https://spec.commonmark.org/0.30/#backslash-escapes)
 //!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
 //! [character_reference]: crate::construct::character_reference
 //! [hard_break_escape]: crate::construct::hard_break_escape
 
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 7d7b6f9..7935109 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -54,8 +54,8 @@
 //! *   [`character-reference.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/character-reference.js)
 //! *   [*§ 2.5 Entity and numeric character references* in `CommonMark`](https://spec.commonmark.org/0.30/#entity-and-numeric-character-references)
 //!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
 //! [character_escape]: crate::construct::character_reference
 //! [decode_numeric]: crate::util::decode_character_reference::decode_numeric
 //! [character_references]: crate::constant::CHARACTER_REFERENCES
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 74d6fe1..3812d44 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -91,9 +91,9 @@
 //! *   [`code-fenced.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js)
 //! *   [*§ 4.5 Fenced code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#fenced-code-blocks)
 //!
-//! [flow]: crate::content::flow
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [flow]: crate::construct::flow
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
 //! [code_indented]: crate::construct::code_indented
 //! [code_text]: crate::construct::code_text
 //! [character_escape]: crate::construct::character_escape
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index cf111f4..e3a5333 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -38,8 +38,8 @@
 //! *   [`code-indented.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-indented.js)
 //! *   [*§ 4.4 Indented code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#indented-code-blocks)
 //!
-//! [flow]: crate::content::flow
-//! [text]: crate::content::text
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
 //! [code_text]: crate::construct::code_text
 //! [code_fenced]: crate::construct::code_fenced
 //! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index d601583..7ebee96 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -77,8 +77,8 @@
 //! *   [`code-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-text.js)
 //! *   [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans)
 //!
-//! [flow]: crate::content::flow
-//! [text]: crate::content::text
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
 //! [code_indented]: crate::construct::code_indented
 //! [code_fenced]: crate::construct::code_fenced
 //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index e242e23..8f274ee 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -80,8 +80,8 @@
 //! *   [`definition.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/definition.js)
 //! *   [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions)
 //!
-//! [flow]: crate::content::flow
-//! [string]: crate::content::string
+//! [flow]: crate::construct::flow
+//! [string]: crate::construct::string
 //! [character_escape]: crate::construct::character_escape
 //! [character_reference]: crate::construct::character_reference
 //! [label_end]: crate::construct::label_end
diff --git a/src/construct/document.rs b/src/construct/document.rs
new file mode 100644
index 0000000..9def6c5
--- /dev/null
+++ b/src/construct/document.rs
@@ -0,0 +1,492 @@
+//! The document content type.
+//!
+//! **Document** represents the containers, such as block quotes and lists,
+//! which structure the document and contain other sections.
+//!
+//! The constructs found in flow are:
+//!
+//! *   [Block quote][crate::construct::block_quote]
+//! *   [List][crate::construct::list_item]
+
+use crate::event::{Content, Event, Kind, Link, Name};
+use crate::state::{Name as StateName, State};
+use crate::subtokenize::divide_events;
+use crate::tokenizer::{Container, ContainerState, Tokenizer};
+use crate::util::skip;
+
+/// Phases where we can exit containers.
+#[derive(Debug, PartialEq)]
+enum Phase {
+    /// After parsing a line of lazy flow which resulted in something that
+    /// exits containers before the line.
+    ///
+    /// ```markdown
+    ///   | * a
+    /// > | ```js
+    ///          ^
+    ///   | b
+    ///   | ```
+    /// ```
+    After,
+    /// When a new container replaces an existing container.
+    ///
+    /// ```markdown
+    ///   | * a
+    /// > | > b
+    ///     ^
+    /// ```
+    Prefix,
+    /// After everything.
+    ///
+    /// ```markdown
+    /// > | * a
+    ///        ^
+    /// ```
+    Eof,
+}
+
+/// Start of document, at an optional BOM.
+///
+/// ```markdown
+/// > | a
+///     ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.tokenize_state.document_child = Some(Box::new(Tokenizer::new(
+        tokenizer.point.clone(),
+        tokenizer.parse_state,
+    )));
+
+    tokenizer.attempt(
+        State::Next(StateName::DocumentContainerExistingBefore),
+        State::Next(StateName::DocumentContainerExistingBefore),
+    );
+
+    State::Retry(StateName::BomStart)
+}
+
+/// At optional existing containers.
+//
+/// ```markdown
+///   | * a
+/// > | > b
+///     ^
+/// ```
+pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
+    // If there are more existing containers, check whether the next one continues.
+    if tokenizer.tokenize_state.document_continued
+        < tokenizer.tokenize_state.document_container_stack.len()
+    {
+        let container = &tokenizer.tokenize_state.document_container_stack
+            [tokenizer.tokenize_state.document_continued];
+
+        let name = match container.kind {
+            Container::BlockQuote => StateName::BlockQuoteContStart,
+            Container::ListItem => StateName::ListItemContStart,
+        };
+
+        tokenizer.attempt(
+            State::Next(StateName::DocumentContainerExistingAfter),
+            State::Next(StateName::DocumentContainerNewBefore),
+        );
+
+        State::Retry(name)
+    }
+    // Otherwise, check new containers.
+    else {
+        State::Retry(StateName::DocumentContainerNewBefore)
+    }
+}
+
+/// After continued existing container.
+//
+/// ```markdown
+///   | * a
+/// > |   b
+///       ^
+/// ```
+pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.tokenize_state.document_continued += 1;
+    State::Retry(StateName::DocumentContainerExistingBefore)
+}
+
+/// At new containers.
+//
+/// ```markdown
+/// > | * a
+///     ^
+/// > | > b
+///     ^
+/// ```
+pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
+    // If we have completely continued, restore the flow’s past `interrupt`
+    // status.
+    if tokenizer.tokenize_state.document_continued
+        == tokenizer.tokenize_state.document_container_stack.len()
+    {
+        let child = tokenizer.tokenize_state.document_child.as_ref().unwrap();
+
+        tokenizer.interrupt = child.interrupt;
+
+        // …and if we’re in a concrete construct, new containers can’t “pierce”
+        // into them.
+        if child.concrete {
+            return State::Retry(StateName::DocumentContainersAfter);
+        }
+    }
+
+    // Check for a new container.
+    // Block quote?
+    // Add a new container at the end of the stack.
+    let tail = tokenizer.tokenize_state.document_container_stack.len();
+    tokenizer
+        .tokenize_state
+        .document_container_stack
+        .push(ContainerState {
+            kind: Container::BlockQuote,
+            blank_initial: false,
+            size: 0,
+        });
+    // Swap the existing container with the new one.
+    tokenizer
+        .tokenize_state
+        .document_container_stack
+        .swap(tokenizer.tokenize_state.document_continued, tail);
+
+    tokenizer.attempt(
+        State::Next(StateName::DocumentContainerNewAfter),
+        State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote),
+    );
+    State::Retry(StateName::BlockQuoteStart)
+}
+
+/// At new container, but not a block quote.
+//
+/// ```markdown
+/// > | * a
+///     ^
+/// ```
+pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State {
+    // List item?
+    // We replace the empty block quote container for this new list one.
+    tokenizer.tokenize_state.document_container_stack
+        [tokenizer.tokenize_state.document_continued] = ContainerState {
+        kind: Container::ListItem,
+        blank_initial: false,
+        size: 0,
+    };
+
+    tokenizer.attempt(
+        State::Next(StateName::DocumentContainerNewAfter),
+        State::Next(StateName::DocumentContainerNewBeforeNotList),
+    );
+    State::Retry(StateName::ListItemStart)
+}
+
+/// At new container, but not a list (or block quote).
+//
+/// ```markdown
+/// > | a
+///     ^
+/// ```
+pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State {
+    // It wasn’t a new block quote or a list.
+    // Swap the new container (in the middle) with the existing one (at the end).
+    // Drop what was in the middle.
+    tokenizer
+        .tokenize_state
+        .document_container_stack
+        .swap_remove(tokenizer.tokenize_state.document_continued);
+
+    State::Retry(StateName::DocumentContainersAfter)
+}
+
+/// After new container.
+///
+/// ```markdown
+/// > | * a
+///       ^
+/// > | > b
+///       ^
+/// ```
+pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
+    // It was a new block quote or a list.
+    // Swap the new container (in the middle) with the existing one (at the end).
+    // Take the new container.
+    let container = tokenizer
+        .tokenize_state
+        .document_container_stack
+        .swap_remove(tokenizer.tokenize_state.document_continued);
+
+    // If we did not continue all existing containers, and there is a new one,
+    // close the flow and those containers.
+    if tokenizer.tokenize_state.document_continued
+        != tokenizer.tokenize_state.document_container_stack.len()
+    {
+        exit_containers(tokenizer, &Phase::Prefix);
+    }
+
+    tokenizer
+        .tokenize_state
+        .document_container_stack
+        .push(container);
+    tokenizer.tokenize_state.document_continued += 1;
+    tokenizer.interrupt = false;
+    State::Retry(StateName::DocumentContainerNewBefore)
+}
+
+/// After containers, at flow.
+//
+/// ```markdown
+/// > | * a
+///       ^
+/// > | > b
+///       ^
+/// ```
+pub fn containers_after(tokenizer: &mut Tokenizer) -> State {
+    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
+
+    child.lazy = tokenizer.tokenize_state.document_continued
+        != tokenizer.tokenize_state.document_container_stack.len();
+    child.define_skip(tokenizer.point.clone());
+
+    match tokenizer.current {
+        // Note: EOL is part of data.
+        None => State::Retry(StateName::DocumentFlowEnd),
+        Some(_) => {
+            let current = tokenizer.events.len();
+            let previous = tokenizer.tokenize_state.document_data_index;
+            if let Some(previous) = previous {
+                tokenizer.events[previous].link.as_mut().unwrap().next = Some(current);
+            }
+            tokenizer.tokenize_state.document_data_index = Some(current);
+            tokenizer.enter_link(
+                Name::Data,
+                Link {
+                    previous,
+                    next: None,
+                    content: Content::Flow,
+                },
+            );
+            State::Retry(StateName::DocumentFlowInside)
+        }
+    }
+}
+
+/// In flow.
+//
+/// ```markdown
+/// > | * ab
+///       ^
+/// ```
+pub fn flow_inside(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        None => {
+            tokenizer.exit(Name::Data);
+            State::Retry(StateName::DocumentFlowEnd)
+        }
+        // Note: EOL is part of data.
+        Some(b'\n') => {
+            tokenizer.consume();
+            tokenizer.exit(Name::Data);
+            State::Next(StateName::DocumentFlowEnd)
+        }
+        Some(_) => {
+            tokenizer.consume();
+            State::Next(StateName::DocumentFlowInside)
+        }
+    }
+}
+
+/// After flow (after eol or at eof).
+//
+/// ```markdown
+///   | * a
+/// > | > b
+///     ^  ^
+/// ```
+pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
+    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
+    let state = tokenizer
+        .tokenize_state
+        .document_child_state
+        .unwrap_or(State::Next(StateName::FlowStart));
+
+    tokenizer.tokenize_state.document_exits.push(None);
+
+    let state = child.push(
+        (child.point.index, child.point.vs),
+        (tokenizer.point.index, tokenizer.point.vs),
+        state,
+    );
+
+    let paragraph = matches!(state, State::Next(StateName::ParagraphInside))
+        || (!child.events.is_empty()
+            && child.events
+                [skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding])]
+            .name
+                == Name::Paragraph);
+
+    tokenizer.tokenize_state.document_child_state = Some(state);
+
+    if child.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before {
+        tokenizer.tokenize_state.document_continued =
+            tokenizer.tokenize_state.document_container_stack.len();
+    }
+
+    if tokenizer.tokenize_state.document_continued
+        != tokenizer.tokenize_state.document_container_stack.len()
+    {
+        exit_containers(tokenizer, &Phase::After);
+    }
+
+    match tokenizer.current {
+        None => {
+            tokenizer.tokenize_state.document_continued = 0;
+            exit_containers(tokenizer, &Phase::Eof);
+            resolve(tokenizer);
+            State::Ok
+        }
+        Some(_) => {
+            tokenizer.tokenize_state.document_continued = 0;
+            tokenizer.tokenize_state.document_paragraph_before = paragraph;
+            // Containers would only be interrupting if we’ve continued.
+            tokenizer.interrupt = false;
+            State::Retry(StateName::DocumentContainerExistingBefore)
+        }
+    }
+}
+
+/// Close containers (and flow if needed).
+fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
+    let mut stack_close = tokenizer
+        .tokenize_state
+        .document_container_stack
+        .split_off(tokenizer.tokenize_state.document_continued);
+
+    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
+
+    // Flush if needed.
+    if *phase != Phase::After {
+        let state = tokenizer
+            .tokenize_state
+            .document_child_state
+            .take()
+            .unwrap_or(State::Next(StateName::FlowStart));
+
+        child.flush(state, false);
+    }
+
+    if !stack_close.is_empty() {
+        let index = tokenizer.tokenize_state.document_exits.len()
+            - (if *phase == Phase::After { 2 } else { 1 });
+        let mut exits = Vec::with_capacity(stack_close.len());
+
+        while !stack_close.is_empty() {
+            let container = stack_close.pop().unwrap();
+            let name = match container.kind {
+                Container::BlockQuote => Name::BlockQuote,
+                Container::ListItem => Name::ListItem,
+            };
+
+            exits.push(Event {
+                kind: Kind::Exit,
+                name: name.clone(),
+                point: tokenizer.point.clone(),
+                link: None,
+            });
+
+            let mut stack_index = tokenizer.stack.len();
+            let mut found = false;
+
+            while stack_index > 0 {
+                stack_index -= 1;
+
+                if tokenizer.stack[stack_index] == name {
+                    tokenizer.stack.remove(stack_index);
+                    found = true;
+                    break;
+                }
+            }
+
+            debug_assert!(found, "expected to find container token to exit");
+        }
+
+        if let Some(ref mut list) = tokenizer.tokenize_state.document_exits[index] {
+            list.append(&mut exits);
+        } else {
+            tokenizer.tokenize_state.document_exits[index] = Some(exits);
+        }
+    }
+
+    child.interrupt = false;
+}
+
+// Inject everything together.
+fn resolve(tokenizer: &mut Tokenizer) {
+    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
+
+    // First, add the container exits into `child`.
+    let mut child_index = 0;
+    let mut line = 0;
+
+    while child_index < child.events.len() {
+        let event = &child.events[child_index];
+
+        if event.kind == Kind::Enter
+            && (event.name == Name::LineEnding || event.name == Name::BlankLineEnding)
+        {
+            if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() {
+                let mut exit_index = 0;
+                while exit_index < exits.len() {
+                    exits[exit_index].point = event.point.clone();
+                    exit_index += 1;
+                }
+
+                child.map.add(child_index, 0, exits);
+            }
+
+            line += 1;
+        }
+
+        child_index += 1;
+    }
+
+    child.map.consume(&mut child.events);
+
+    // Now, add all child events into our parent document tokenizer.
+    divide_events(
+        &mut tokenizer.map,
+        &tokenizer.events,
+        skip::to(&tokenizer.events, 0, &[Name::Data]),
+        &mut child.events,
+    );
+
+    // Replace the flow data with actual events.
+    tokenizer.map.consume(&mut tokenizer.events);
+
+    // Now, add some final container exits due to the EOF.
+    // We can’t inject them into the child earlier, as they are “outside” its
+    // linked data.
+    if line < tokenizer.tokenize_state.document_exits.len() {
+        if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() {
+            let mut exit_index = 0;
+            while exit_index < exits.len() {
+                exits[exit_index].point = tokenizer.point.clone();
+                exit_index += 1;
+            }
+
+            tokenizer.events.append(&mut exits);
+        }
+    }
+
+    // Add the resolvers from child.
+    tokenizer
+        .resolvers
+        .append(&mut child.resolvers.split_off(0));
+
+    tokenizer
+        .tokenize_state
+        .definitions
+        .append(&mut child.tokenize_state.definitions.split_off(0));
+}
diff --git a/src/construct/flow.rs b/src/construct/flow.rs
new file mode 100644
index 0000000..08c7891
--- /dev/null
+++ b/src/construct/flow.rs
@@ -0,0 +1,254 @@
+//! The flow content type.
+//!
+//! **Flow** represents the sections, such as headings and code, which are
+//! parsed per line.
+//! An example is HTML, which has a certain starting condition (such as
+//! `<script>` on its own line), then continues for a while, until an end
+//! condition is found (such as `</style>`).
+//! If that line with an end condition is never found, that flow goes until
+//! the end.
+//!
+//! The constructs found in flow are:
+//!
+//! *   [Blank line][crate::construct::blank_line]
+//! *   [Code (fenced)][crate::construct::code_fenced]
+//! *   [Code (indented)][crate::construct::code_indented]
+//! *   [Definition][crate::construct::definition]
+//! *   [Heading (atx)][crate::construct::heading_atx]
+//! *   [Heading (setext)][crate::construct::heading_setext]
+//! *   [HTML (flow)][crate::construct::html_flow]
+//! *   [Thematic break][crate::construct::thematic_break]
+
+use crate::event::Name;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Start of flow.
+//
+/// ```markdown
+/// > | ## alpha
+///     ^
+/// > |     bravo
+///     ^
+/// > | ***
+///     ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        Some(b'`' | b'~') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::CodeFencedStart)
+        }
+        Some(b'<') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::HtmlFlowStart)
+        }
+        Some(b'#') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::HeadingAtxStart)
+        }
+        // Note: `-` is also used in thematic breaks, so it’s not included here.
+        Some(b'=') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::HeadingSetextStart)
+        }
+        Some(b'*' | b'_') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::ThematicBreakStart)
+        }
+        Some(b'[') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::DefinitionStart)
+        }
+        // Actual parsing: blank line? Indented code? Indented anything?
+        // Also includes `-` which can be a setext heading underline or a thematic break.
+        None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
+        // Must be a paragraph.
+        Some(_) => {
+            tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
+            State::Retry(StateName::ParagraphStart)
+        }
+    }
+}
+
+/// At blank line.
+///
+/// ```markdown
+/// > | ␠␠␊
+///     ^
+/// ```
+pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowBlankLineAfter),
+        State::Next(StateName::FlowBeforeCodeIndented),
+    );
+    State::Retry(StateName::BlankLineStart)
+}
+
+/// At code (indented).
+///
+/// ```markdown
+/// > | ␠␠␠␠a
+///     ^
+/// ```
+pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeCodeFenced),
+    );
+    State::Retry(StateName::CodeIndentedStart)
+}
+
+/// At code (fenced).
+///
+/// ````markdown
+/// > | ```
+///     ^
+/// ````
+pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeHtml),
+    );
+    State::Retry(StateName::CodeFencedStart)
+}
+
+/// At html (flow).
+///
+/// ```markdown
+/// > | <a>
+///     ^
+/// ```
+pub fn before_html(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeHeadingAtx),
+    );
+    State::Retry(StateName::HtmlFlowStart)
+}
+
+/// At heading (atx).
+///
+/// ```markdown
+/// > | # a
+///     ^
+/// ```
+pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeHeadingSetext),
+    );
+    State::Retry(StateName::HeadingAtxStart)
+}
+
+/// At heading (setext).
+///
+/// ```markdown
+///   | a
+/// > | =
+///     ^
+/// ```
+pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeThematicBreak),
+    );
+    State::Retry(StateName::HeadingSetextStart)
+}
+
+/// At thematic break.
+///
+/// ```markdown
+/// > | ***
+///     ^
+/// ```
+pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeDefinition),
+    );
+    State::Retry(StateName::ThematicBreakStart)
+}
+
+/// At definition.
+///
+/// ```markdown
+/// > | [a]: b
+///     ^
+/// ```
+pub fn before_definition(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeParagraph),
+    );
+    State::Retry(StateName::DefinitionStart)
+}
+
+/// At paragraph.
+///
+/// ```markdown
+/// > | a
+///     ^
+/// ```
+pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
+    State::Retry(StateName::ParagraphStart)
+}
+
+/// After blank line.
+///
+/// ```markdown
+/// > | ␠␠␊
+///       ^
+/// ```
+pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        None => State::Ok,
+        Some(b'\n') => {
+            tokenizer.enter(Name::BlankLineEnding);
+            tokenizer.consume();
+            tokenizer.exit(Name::BlankLineEnding);
+            // Feel free to interrupt.
+            tokenizer.interrupt = false;
+            State::Next(StateName::FlowStart)
+        }
+        _ => unreachable!("expected eol/eof"),
+    }
+}
+
+/// After flow.
+///
+/// ```markdown
+/// > | # a␊
+///        ^
+/// ```
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        None => State::Ok,
+        Some(b'\n') => {
+            tokenizer.enter(Name::LineEnding);
+            tokenizer.consume();
+            tokenizer.exit(Name::LineEnding);
+            State::Next(StateName::FlowStart)
+        }
+        _ => unreachable!("expected eol/eof"),
+    }
+}
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index 79d8150..1fafa0b 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -33,7 +33,7 @@
 //! *   [`hard-break-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/hard-break-escape.js)
 //! *   [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks)
 //!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
 //! [character_escape]: crate::construct::character_escape
 //! [character_reference]: crate::construct::character_reference
 //! [hard_break_trailing]: crate::construct::partial_whitespace
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 30c22f3..3bcff54 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -47,7 +47,7 @@
 //! *   [`heading-atx.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/heading-atx.js)
 //! *   [*§ 4.2 ATX headings* in `CommonMark`](https://spec.commonmark.org/0.30/#atx-headings)
 //!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
 //! [heading_setext]: crate::construct::heading_setext
 //! [hard_break_escape]: crate::construct::hard_break_escape
 //! [html]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 1f6270a..043104a 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -49,7 +49,7 @@
 //! *   [`setext-underline.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/setext-underline.js)
 //! *   [*§ 4.3 Setext headings* in `CommonMark`](https://spec.commonmark.org/0.30/#setext-headings)
 //!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
 //! [heading_atx]: crate::construct::heading_atx
 //! [thematic_break]: crate::construct::thematic_break
 //! [hard_break_escape]: crate::construct::hard_break_escape
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index e90abc4..38e33f8 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -91,7 +91,7 @@
 //! *   [`html-flow.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-flow.js)
 //! *   [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks)
 //!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
 //! [html_text]: crate::construct::html_text
 //! [paragraph]: crate::construct::paragraph
 //! [html_raw_names]: crate::constant::HTML_RAW_NAMES
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index c3b0a65..fde0847 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -50,7 +50,7 @@
 //! *   [`html-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-text.js)
 //! *   [*§ 6.6 Raw HTML* in `CommonMark`](https://spec.commonmark.org/0.30/#raw-html)
 //!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
 //! [html_flow]: crate::construct::html_flow
 //! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
 
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 5d8f17a..09716b7 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -132,8 +132,8 @@
 //! *   [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links)
 //! *   [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images)
 //!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
 //! [destination]: crate::construct::partial_destination
 //! [title]: crate::construct::partial_title
 //! [label]: crate::construct::partial_label
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index e6a75d3..ffc1aee 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -24,7 +24,7 @@
 //! *   [`label-start-image.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-start-image.js)
 //! *   [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images)
 //!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
 //! [label_end]: crate::construct::label_end
 //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
 
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index 7b6ac6e..dad6884 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -23,7 +23,7 @@
 //! *   [`label-start-link.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-start-link.js)
 //! *   [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links)
 //!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
 //! [label_end]: crate::construct::label_end
 //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
 
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
index 5161254..a70906a 100644
--- a/src/construct/list_item.rs
+++ b/src/construct/list_item.rs
@@ -38,7 +38,7 @@
 //! *   [*§ 5.2 List items* in `CommonMark`](https://spec.commonmark.org/0.30/#list-items)
 //! *   [*§ 5.3 Lists* in `CommonMark`](https://spec.commonmark.org/0.30/#lists)
 //!
-//! [document]: crate::content::document
+//! [document]: crate::construct::document
 //! [html-li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element
 //! [html-ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element
 //! [html-ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 566bb30..49868e9 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -76,6 +76,8 @@ pub mod code_fenced;
 pub mod code_indented;
 pub mod code_text;
 pub mod definition;
+pub mod document;
+pub mod flow;
 pub mod hard_break_escape;
 pub mod heading_atx;
 pub mod heading_setext;
@@ -95,4 +97,6 @@ pub mod partial_space_or_tab;
 pub mod partial_space_or_tab_eol;
 pub mod partial_title;
 pub mod partial_whitespace;
+pub mod string;
+pub mod text;
 pub mod thematic_break;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index b33346a..5f89ef9 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -26,8 +26,8 @@
 //! *   [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)
 //! *   [*§ 4.8 Paragraphs* in `CommonMark`](https://spec.commonmark.org/0.30/#paragraphs)
 //!
-//! [flow]: crate::content::flow
-//! [text]: crate::content::text
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
 //! [autolink]: crate::construct::autolink
 //! [code_text]: crate::construct::code_text
 //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 86e48c7..3ffa646 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -3,8 +3,8 @@
 //! It can include anything (including line endings), and stops at certain
 //! characters.
 //!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
 
 use crate::event::{Kind, Name};
 use crate::resolve::Name as ResolveName;
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index d2477ab..b5254d3 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -65,7 +65,7 @@
 //! *   [`micromark-factory-destination/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-destination/dev/index.js)
 //!
 //! [definition]: crate::construct::definition
-//! [string]: crate::content::string
+//! [string]: crate::construct::string
 //! [character_escape]: crate::construct::character_escape
 //! [character_reference]: crate::construct::character_reference
 //! [label_end]: crate::construct::label_end
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 20a7b15..00d613a 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -48,7 +48,7 @@
 //! *   [`micromark-factory-label/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-label/dev/index.js)
 //!
 //! [definition]: crate::construct::definition
-//! [string]: crate::content::string
+//! [string]: crate::construct::string
 //! [attention]: crate::construct::attention
 //! [character_escape]: crate::construct::character_escape
 //! [character_reference]: crate::construct::character_reference
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
index 3bbf7cc..5eec71f 100644
--- a/src/construct/partial_non_lazy_continuation.rs
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -5,7 +5,7 @@
 //! For example, [html (flow)][html_flow] and code ([fenced][code_fenced],
 //! [indented][code_indented]), stop when next line is lazy.
 //!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
 //! [code_fenced]: crate::construct::code_fenced
 //! [code_indented]: crate::construct::code_indented
 //! [html_flow]: crate::construct::html_flow
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index f0c4931..50d26be 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -25,7 +25,7 @@
 //! *   [`micromark-factory-title/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-title/dev/index.js)
 //!
 //! [definition]: crate::construct::definition
-//! [string]: crate::content::string
+//! [string]: crate::construct::string
 //! [character_escape]: crate::construct::character_escape
 //! [character_reference]: crate::construct::character_reference
 //! [label_end]: crate::construct::label_end
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index 7152881..bf06df9 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -38,8 +38,8 @@
 //! *   [`initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js)
 //! *   [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks)
 //!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
 //! [hard_break_escape]: crate::construct::hard_break_escape
 //! [character_escape]: crate::construct::character_escape
 //! [hard_break_prefix_size_min]: crate::constant::HARD_BREAK_PREFIX_SIZE_MIN
diff --git a/src/construct/string.rs b/src/construct/string.rs
new file mode 100644
index 0000000..698a51d
--- /dev/null
+++ b/src/construct/string.rs
@@ -0,0 +1,76 @@
+//! The string content type.
+//!
+//! **String** is a limited [text][] like content type which only allows
+//! character escapes and character references.
+//! It exists in things such as identifiers (media references, definitions),
+//! titles, URLs, code (fenced) info and meta parts.
+//!
+//! The constructs found in string are:
+//!
+//! *   [Character escape][crate::construct::character_escape]
+//! *   [Character reference][crate::construct::character_reference]
+//!
+//! [text]: crate::construct::text
+
+use crate::construct::partial_whitespace::resolve_whitespace;
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Characters that can start something in string.
+const MARKERS: [u8; 2] = [b'&', b'\\'];
+
+/// Start of string.
+///
+/// ````markdown
+/// > | ```js
+///        ^
+/// ````
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.register_resolver(ResolveName::String);
+    tokenizer.tokenize_state.markers = &MARKERS;
+    State::Retry(StateName::StringBefore)
+}
+
+/// Before string.
+///
+/// ````markdown
+/// > | ```js
+///        ^
+/// ````
+pub fn before(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        None => State::Ok,
+        Some(b'&') => {
+            tokenizer.attempt(
+                State::Next(StateName::StringBefore),
+                State::Next(StateName::StringBeforeData),
+            );
+            State::Retry(StateName::CharacterReferenceStart)
+        }
+        Some(b'\\') => {
+            tokenizer.attempt(
+                State::Next(StateName::StringBefore),
+                State::Next(StateName::StringBeforeData),
+            );
+            State::Retry(StateName::CharacterEscapeStart)
+        }
+        _ => State::Retry(StateName::StringBeforeData),
+    }
+}
+
+/// At data.
+///
+/// ````markdown
+/// > | ```js
+///        ^
+/// ````
+pub fn before_data(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok);
+    State::Retry(StateName::DataStart)
+}
+
+/// Resolve whitespace in string.
+pub fn resolve(tokenizer: &mut Tokenizer) {
+    resolve_whitespace(tokenizer, false, false);
+}
diff --git a/src/construct/text.rs b/src/construct/text.rs
new file mode 100644
index 0000000..5c13dba
--- /dev/null
+++ b/src/construct/text.rs
@@ -0,0 +1,173 @@
+//! The text content type.
+//!
+//! **Text** contains phrasing content such as
+//! [attention][crate::construct::attention] (emphasis, strong),
+//! [code (text)][crate::construct::code_text], and actual text.
+//!
+//! The constructs found in text are:
+//!
+//! *   [Attention][crate::construct::attention]
+//! *   [Autolink][crate::construct::autolink]
+//! *   [Character escape][crate::construct::character_escape]
+//! *   [Character reference][crate::construct::character_reference]
+//! *   [Code (text)][crate::construct::code_text]
+//! *   [Hard break (escape)][crate::construct::hard_break_escape]
+//! *   [HTML (text)][crate::construct::html_text]
+//! *   [Label start (image)][crate::construct::label_start_image]
+//! *   [Label start (link)][crate::construct::label_start_link]
+//! *   [Label end][crate::construct::label_end]
+//!
+//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
+//! > [whitespace][crate::construct::partial_whitespace].
+
+use crate::construct::partial_whitespace::resolve_whitespace;
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Characters that can start something in text.
+const MARKERS: [u8; 9] = [
+    b'!',  // `label_start_image`
+    b'&',  // `character_reference`
+    b'*',  // `attention`
+    b'<',  // `autolink`, `html_text`
+    b'[',  // `label_start_link`
+    b'\\', // `character_escape`, `hard_break_escape`
+    b']',  // `label_end`
+    b'_',  // `attention`
+    b'`',  // `code_text`
+];
+
+/// Start of text.
+///
+/// ```markdown
+/// > | abc
+///     ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.register_resolver(ResolveName::Text);
+    tokenizer.tokenize_state.markers = &MARKERS;
+    State::Retry(StateName::TextBefore)
+}
+
+/// Before text.
+///
+/// ```markdown
+/// > | abc
+///     ^
+/// ```
+pub fn before(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        None => State::Ok,
+        Some(b'!') => {
+            tokenizer.attempt(
+                State::Next(StateName::TextBefore),
+                State::Next(StateName::TextBeforeData),
+            );
+            State::Retry(StateName::LabelStartImageStart)
+        }
+        Some(b'&') => {
+            tokenizer.attempt(
+                State::Next(StateName::TextBefore),
+                State::Next(StateName::TextBeforeData),
+            );
+            State::Retry(StateName::CharacterReferenceStart)
+        }
+        Some(b'*' | b'_') => {
+            tokenizer.attempt(
+                State::Next(StateName::TextBefore),
+                State::Next(StateName::TextBeforeData),
+            );
+            State::Retry(StateName::AttentionStart)
+        }
+        // `autolink`, `html_text` (order does not matter)
+        Some(b'<') => {
+            tokenizer.attempt(
+                State::Next(StateName::TextBefore),
+                State::Next(StateName::TextBeforeHtml),
+            );
+            State::Retry(StateName::AutolinkStart)
+        }
+        Some(b'[') => {
+            tokenizer.attempt(
+                State::Next(StateName::TextBefore),
+                State::Next(StateName::TextBeforeData),
+            );
+            State::Retry(StateName::LabelStartLinkStart)
+        }
+        Some(b'\\') => {
+            tokenizer.attempt(
+                State::Next(StateName::TextBefore),
+                State::Next(StateName::TextBeforeHardBreakEscape),
+            );
+            State::Retry(StateName::CharacterEscapeStart)
+        }
+        Some(b']') => {
+            tokenizer.attempt(
+                State::Next(StateName::TextBefore),
+                State::Next(StateName::TextBeforeData),
+            );
+            State::Retry(StateName::LabelEndStart)
+        }
+        Some(b'`') => {
+            tokenizer.attempt(
+                State::Next(StateName::TextBefore),
+                State::Next(StateName::TextBeforeData),
+            );
+            State::Retry(StateName::CodeTextStart)
+        }
+        _ => State::Retry(StateName::TextBeforeData),
+    }
+}
+
+/// Before html (text).
+///
+/// At `<`, which wasn’t an autolink.
+///
+/// ```markdown
+/// > | a <b>
+///       ^
+/// ```
+pub fn before_html(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::TextBefore),
+        State::Next(StateName::TextBeforeData),
+    );
+    State::Retry(StateName::HtmlTextStart)
+}
+
+/// Before hard break escape.
+///
+/// At `\`, which wasn’t a character escape.
+///
+/// ```markdown
+/// > | a \␊
+///       ^
+/// ```
+pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::TextBefore),
+        State::Next(StateName::TextBeforeData),
+    );
+    State::Retry(StateName::HardBreakEscapeStart)
+}
+
+/// Before data.
+///
+/// ```markdown
+/// > | a
+///     ^
+/// ```
+pub fn before_data(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok);
+    State::Retry(StateName::DataStart)
+}
+
+/// Resolve whitespace.
+pub fn resolve(tokenizer: &mut Tokenizer) {
+    resolve_whitespace(
+        tokenizer,
+        tokenizer.parse_state.constructs.hard_break_trailing,
+        true,
+    );
+}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 1b581ea..9496d98 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -43,7 +43,7 @@
 //! *   [`thematic-break.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/thematic-break.js)
 //! *   [*§ 4.1 Thematic breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#thematic-breaks)
 //!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
 //! [heading_setext]: crate::construct::heading_setext
 //! [list-item]: crate::construct::list_item
 //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-hr-element
author	Titus Wormer <tituswormer@gmail.com>	2022-08-15 11:40:40 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-08-15 11:40:40 +0200
commit	ee967aa634b5f8e9d30329d587538f1371a5da95 (patch)
tree	cdc1461c822e440b24428eb8d431881e216ab8bd /src/construct
parent	13135666fac476f3cd6f059147f496533b304097 (diff)
download	markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.gz markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.bz2 markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.zip