aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-15 11:40:40 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-15 11:40:40 +0200
commitee967aa634b5f8e9d30329d587538f1371a5da95 (patch)
treecdc1461c822e440b24428eb8d431881e216ab8bd /src/construct
parent13135666fac476f3cd6f059147f496533b304097 (diff)
downloadmarkdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.gz
markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.bz2
markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.zip
Refactor to move `content` to `construct`
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/attention.rs2
-rw-r--r--src/construct/autolink.rs2
-rw-r--r--src/construct/blank_line.rs2
-rw-r--r--src/construct/block_quote.rs2
-rw-r--r--src/construct/character_escape.rs4
-rw-r--r--src/construct/character_reference.rs4
-rw-r--r--src/construct/code_fenced.rs6
-rw-r--r--src/construct/code_indented.rs4
-rw-r--r--src/construct/code_text.rs4
-rw-r--r--src/construct/definition.rs4
-rw-r--r--src/construct/document.rs492
-rw-r--r--src/construct/flow.rs254
-rw-r--r--src/construct/hard_break_escape.rs2
-rw-r--r--src/construct/heading_atx.rs2
-rw-r--r--src/construct/heading_setext.rs2
-rw-r--r--src/construct/html_flow.rs2
-rw-r--r--src/construct/html_text.rs2
-rw-r--r--src/construct/label_end.rs4
-rw-r--r--src/construct/label_start_image.rs2
-rw-r--r--src/construct/label_start_link.rs2
-rw-r--r--src/construct/list_item.rs2
-rw-r--r--src/construct/mod.rs4
-rw-r--r--src/construct/paragraph.rs4
-rw-r--r--src/construct/partial_data.rs4
-rw-r--r--src/construct/partial_destination.rs2
-rw-r--r--src/construct/partial_label.rs2
-rw-r--r--src/construct/partial_non_lazy_continuation.rs2
-rw-r--r--src/construct/partial_title.rs2
-rw-r--r--src/construct/partial_whitespace.rs4
-rw-r--r--src/construct/string.rs76
-rw-r--r--src/construct/text.rs173
-rw-r--r--src/construct/thematic_break.rs2
32 files changed, 1037 insertions, 38 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index ae8da81..1dc8868 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -47,7 +47,7 @@
//! * [`attention.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/attention.js)
//! * [*§ 6.2 Emphasis and strong emphasis* in `CommonMark`](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis)
//!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
//! [html-em]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-em-element
//! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 5c826a3..37e21d9 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -94,7 +94,7 @@
//! * [`autolink.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/autolink.js)
//! * [*§ 6.4 Autolinks* in `CommonMark`](https://spec.commonmark.org/0.30/#autolinks)
//!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
//! [label_end]: crate::construct::label_end
//! [autolink_scheme_size_max]: crate::constant::AUTOLINK_SCHEME_SIZE_MAX
//! [autolink_domain_size_max]: crate::constant::AUTOLINK_DOMAIN_SIZE_MAX
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index 87d257d..928b8cc 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -30,7 +30,7 @@
//! [heading-atx]: crate::construct::heading_atx
//! [list-item]: crate::construct::list_item
//! [paragraph]: crate::construct::paragraph
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
use crate::construct::partial_space_or_tab::space_or_tab;
use crate::state::{Name as StateName, State};
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 4f0870f..37726c5 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -29,7 +29,7 @@
//! * [`block-quote.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/block-quote.js)
//! * [*§ 5.1 Block quotes* in `CommonMark`](https://spec.commonmark.org/0.30/#block-quotes)
//!
-//! [document]: crate::content::document
+//! [document]: crate::construct::document
//! [html-blockquote]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-blockquote-element
//! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index ac91c29..6dac458 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -28,8 +28,8 @@
//! * [`character-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/character-escape.js)
//! * [*§ 2.4 Backslash escapes* in `CommonMark`](https://spec.commonmark.org/0.30/#backslash-escapes)
//!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
//! [character_reference]: crate::construct::character_reference
//! [hard_break_escape]: crate::construct::hard_break_escape
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 7d7b6f9..7935109 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -54,8 +54,8 @@
//! * [`character-reference.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/character-reference.js)
//! * [*§ 2.5 Entity and numeric character references* in `CommonMark`](https://spec.commonmark.org/0.30/#entity-and-numeric-character-references)
//!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
//! [character_escape]: crate::construct::character_reference
//! [decode_numeric]: crate::util::decode_character_reference::decode_numeric
//! [character_references]: crate::constant::CHARACTER_REFERENCES
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 74d6fe1..3812d44 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -91,9 +91,9 @@
//! * [`code-fenced.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js)
//! * [*§ 4.5 Fenced code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#fenced-code-blocks)
//!
-//! [flow]: crate::content::flow
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [flow]: crate::construct::flow
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
//! [code_indented]: crate::construct::code_indented
//! [code_text]: crate::construct::code_text
//! [character_escape]: crate::construct::character_escape
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index cf111f4..e3a5333 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -38,8 +38,8 @@
//! * [`code-indented.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-indented.js)
//! * [*§ 4.4 Indented code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#indented-code-blocks)
//!
-//! [flow]: crate::content::flow
-//! [text]: crate::content::text
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
//! [code_text]: crate::construct::code_text
//! [code_fenced]: crate::construct::code_fenced
//! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index d601583..7ebee96 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -77,8 +77,8 @@
//! * [`code-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-text.js)
//! * [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans)
//!
-//! [flow]: crate::content::flow
-//! [text]: crate::content::text
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
//! [code_indented]: crate::construct::code_indented
//! [code_fenced]: crate::construct::code_fenced
//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index e242e23..8f274ee 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -80,8 +80,8 @@
//! * [`definition.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/definition.js)
//! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions)
//!
-//! [flow]: crate::content::flow
-//! [string]: crate::content::string
+//! [flow]: crate::construct::flow
+//! [string]: crate::construct::string
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [label_end]: crate::construct::label_end
diff --git a/src/construct/document.rs b/src/construct/document.rs
new file mode 100644
index 0000000..9def6c5
--- /dev/null
+++ b/src/construct/document.rs
@@ -0,0 +1,492 @@
+//! The document content type.
+//!
+//! **Document** represents the containers, such as block quotes and lists,
+//! which structure the document and contain other sections.
+//!
+//! The constructs found in flow are:
+//!
+//! * [Block quote][crate::construct::block_quote]
+//! * [List][crate::construct::list_item]
+
+use crate::event::{Content, Event, Kind, Link, Name};
+use crate::state::{Name as StateName, State};
+use crate::subtokenize::divide_events;
+use crate::tokenizer::{Container, ContainerState, Tokenizer};
+use crate::util::skip;
+
+/// Phases where we can exit containers.
+#[derive(Debug, PartialEq)]
+enum Phase {
+ /// After parsing a line of lazy flow which resulted in something that
+ /// exits containers before the line.
+ ///
+ /// ```markdown
+ /// | * a
+ /// > | ```js
+ /// ^
+ /// | b
+ /// | ```
+ /// ```
+ After,
+ /// When a new container replaces an existing container.
+ ///
+ /// ```markdown
+ /// | * a
+ /// > | > b
+ /// ^
+ /// ```
+ Prefix,
+ /// After everything.
+ ///
+ /// ```markdown
+ /// > | * a
+ /// ^
+ /// ```
+ Eof,
+}
+
+/// Start of document, at an optional BOM.
+///
+/// ```markdown
+/// > | a
+/// ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.document_child = Some(Box::new(Tokenizer::new(
+ tokenizer.point.clone(),
+ tokenizer.parse_state,
+ )));
+
+ tokenizer.attempt(
+ State::Next(StateName::DocumentContainerExistingBefore),
+ State::Next(StateName::DocumentContainerExistingBefore),
+ );
+
+ State::Retry(StateName::BomStart)
+}
+
+/// At optional existing containers.
+//
+/// ```markdown
+/// | * a
+/// > | > b
+/// ^
+/// ```
+pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
+ // If there are more existing containers, check whether the next one continues.
+ if tokenizer.tokenize_state.document_continued
+ < tokenizer.tokenize_state.document_container_stack.len()
+ {
+ let container = &tokenizer.tokenize_state.document_container_stack
+ [tokenizer.tokenize_state.document_continued];
+
+ let name = match container.kind {
+ Container::BlockQuote => StateName::BlockQuoteContStart,
+ Container::ListItem => StateName::ListItemContStart,
+ };
+
+ tokenizer.attempt(
+ State::Next(StateName::DocumentContainerExistingAfter),
+ State::Next(StateName::DocumentContainerNewBefore),
+ );
+
+ State::Retry(name)
+ }
+ // Otherwise, check new containers.
+ else {
+ State::Retry(StateName::DocumentContainerNewBefore)
+ }
+}
+
+/// After continued existing container.
+//
+/// ```markdown
+/// | * a
+/// > | b
+/// ^
+/// ```
+pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.document_continued += 1;
+ State::Retry(StateName::DocumentContainerExistingBefore)
+}
+
+/// At new containers.
+//
+/// ```markdown
+/// > | * a
+/// ^
+/// > | > b
+/// ^
+/// ```
+pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
+ // If we have completely continued, restore the flow’s past `interrupt`
+ // status.
+ if tokenizer.tokenize_state.document_continued
+ == tokenizer.tokenize_state.document_container_stack.len()
+ {
+ let child = tokenizer.tokenize_state.document_child.as_ref().unwrap();
+
+ tokenizer.interrupt = child.interrupt;
+
+ // …and if we’re in a concrete construct, new containers can’t “pierce”
+ // into them.
+ if child.concrete {
+ return State::Retry(StateName::DocumentContainersAfter);
+ }
+ }
+
+ // Check for a new container.
+ // Block quote?
+ // Add a new container at the end of the stack.
+ let tail = tokenizer.tokenize_state.document_container_stack.len();
+ tokenizer
+ .tokenize_state
+ .document_container_stack
+ .push(ContainerState {
+ kind: Container::BlockQuote,
+ blank_initial: false,
+ size: 0,
+ });
+ // Swap the existing container with the new one.
+ tokenizer
+ .tokenize_state
+ .document_container_stack
+ .swap(tokenizer.tokenize_state.document_continued, tail);
+
+ tokenizer.attempt(
+ State::Next(StateName::DocumentContainerNewAfter),
+ State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote),
+ );
+ State::Retry(StateName::BlockQuoteStart)
+}
+
+/// At new container, but not a block quote.
+//
+/// ```markdown
+/// > | * a
+/// ^
+/// ```
+pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State {
+ // List item?
+ // We replace the empty block quote container for this new list one.
+ tokenizer.tokenize_state.document_container_stack
+ [tokenizer.tokenize_state.document_continued] = ContainerState {
+ kind: Container::ListItem,
+ blank_initial: false,
+ size: 0,
+ };
+
+ tokenizer.attempt(
+ State::Next(StateName::DocumentContainerNewAfter),
+ State::Next(StateName::DocumentContainerNewBeforeNotList),
+ );
+ State::Retry(StateName::ListItemStart)
+}
+
+/// At new container, but not a list (or block quote).
+//
+/// ```markdown
+/// > | a
+/// ^
+/// ```
+pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State {
+ // It wasn’t a new block quote or a list.
+ // Swap the new container (in the middle) with the existing one (at the end).
+ // Drop what was in the middle.
+ tokenizer
+ .tokenize_state
+ .document_container_stack
+ .swap_remove(tokenizer.tokenize_state.document_continued);
+
+ State::Retry(StateName::DocumentContainersAfter)
+}
+
+/// After new container.
+///
+/// ```markdown
+/// > | * a
+/// ^
+/// > | > b
+/// ^
+/// ```
+pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
+ // It was a new block quote or a list.
+ // Swap the new container (in the middle) with the existing one (at the end).
+ // Take the new container.
+ let container = tokenizer
+ .tokenize_state
+ .document_container_stack
+ .swap_remove(tokenizer.tokenize_state.document_continued);
+
+ // If we did not continue all existing containers, and there is a new one,
+ // close the flow and those containers.
+ if tokenizer.tokenize_state.document_continued
+ != tokenizer.tokenize_state.document_container_stack.len()
+ {
+ exit_containers(tokenizer, &Phase::Prefix);
+ }
+
+ tokenizer
+ .tokenize_state
+ .document_container_stack
+ .push(container);
+ tokenizer.tokenize_state.document_continued += 1;
+ tokenizer.interrupt = false;
+ State::Retry(StateName::DocumentContainerNewBefore)
+}
+
+/// After containers, at flow.
+//
+/// ```markdown
+/// > | * a
+/// ^
+/// > | > b
+/// ^
+/// ```
+pub fn containers_after(tokenizer: &mut Tokenizer) -> State {
+ let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
+
+ child.lazy = tokenizer.tokenize_state.document_continued
+ != tokenizer.tokenize_state.document_container_stack.len();
+ child.define_skip(tokenizer.point.clone());
+
+ match tokenizer.current {
+ // Note: EOL is part of data.
+ None => State::Retry(StateName::DocumentFlowEnd),
+ Some(_) => {
+ let current = tokenizer.events.len();
+ let previous = tokenizer.tokenize_state.document_data_index;
+ if let Some(previous) = previous {
+ tokenizer.events[previous].link.as_mut().unwrap().next = Some(current);
+ }
+ tokenizer.tokenize_state.document_data_index = Some(current);
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous,
+ next: None,
+ content: Content::Flow,
+ },
+ );
+ State::Retry(StateName::DocumentFlowInside)
+ }
+ }
+}
+
+/// In flow.
+//
+/// ```markdown
+/// > | * ab
+/// ^
+/// ```
+pub fn flow_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => {
+ tokenizer.exit(Name::Data);
+ State::Retry(StateName::DocumentFlowEnd)
+ }
+ // Note: EOL is part of data.
+ Some(b'\n') => {
+ tokenizer.consume();
+ tokenizer.exit(Name::Data);
+ State::Next(StateName::DocumentFlowEnd)
+ }
+ Some(_) => {
+ tokenizer.consume();
+ State::Next(StateName::DocumentFlowInside)
+ }
+ }
+}
+
+/// After flow (after eol or at eof).
+//
+/// ```markdown
+/// | * a
+/// > | > b
+/// ^ ^
+/// ```
+pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
+ let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
+ let state = tokenizer
+ .tokenize_state
+ .document_child_state
+ .unwrap_or(State::Next(StateName::FlowStart));
+
+ tokenizer.tokenize_state.document_exits.push(None);
+
+ let state = child.push(
+ (child.point.index, child.point.vs),
+ (tokenizer.point.index, tokenizer.point.vs),
+ state,
+ );
+
+ let paragraph = matches!(state, State::Next(StateName::ParagraphInside))
+ || (!child.events.is_empty()
+ && child.events
+ [skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding])]
+ .name
+ == Name::Paragraph);
+
+ tokenizer.tokenize_state.document_child_state = Some(state);
+
+ if child.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before {
+ tokenizer.tokenize_state.document_continued =
+ tokenizer.tokenize_state.document_container_stack.len();
+ }
+
+ if tokenizer.tokenize_state.document_continued
+ != tokenizer.tokenize_state.document_container_stack.len()
+ {
+ exit_containers(tokenizer, &Phase::After);
+ }
+
+ match tokenizer.current {
+ None => {
+ tokenizer.tokenize_state.document_continued = 0;
+ exit_containers(tokenizer, &Phase::Eof);
+ resolve(tokenizer);
+ State::Ok
+ }
+ Some(_) => {
+ tokenizer.tokenize_state.document_continued = 0;
+ tokenizer.tokenize_state.document_paragraph_before = paragraph;
+ // Containers would only be interrupting if we’ve continued.
+ tokenizer.interrupt = false;
+ State::Retry(StateName::DocumentContainerExistingBefore)
+ }
+ }
+}
+
+/// Close containers (and flow if needed).
+fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
+ let mut stack_close = tokenizer
+ .tokenize_state
+ .document_container_stack
+ .split_off(tokenizer.tokenize_state.document_continued);
+
+ let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
+
+ // Flush if needed.
+ if *phase != Phase::After {
+ let state = tokenizer
+ .tokenize_state
+ .document_child_state
+ .take()
+ .unwrap_or(State::Next(StateName::FlowStart));
+
+ child.flush(state, false);
+ }
+
+ if !stack_close.is_empty() {
+ let index = tokenizer.tokenize_state.document_exits.len()
+ - (if *phase == Phase::After { 2 } else { 1 });
+ let mut exits = Vec::with_capacity(stack_close.len());
+
+ while !stack_close.is_empty() {
+ let container = stack_close.pop().unwrap();
+ let name = match container.kind {
+ Container::BlockQuote => Name::BlockQuote,
+ Container::ListItem => Name::ListItem,
+ };
+
+ exits.push(Event {
+ kind: Kind::Exit,
+ name: name.clone(),
+ point: tokenizer.point.clone(),
+ link: None,
+ });
+
+ let mut stack_index = tokenizer.stack.len();
+ let mut found = false;
+
+ while stack_index > 0 {
+ stack_index -= 1;
+
+ if tokenizer.stack[stack_index] == name {
+ tokenizer.stack.remove(stack_index);
+ found = true;
+ break;
+ }
+ }
+
+ debug_assert!(found, "expected to find container token to exit");
+ }
+
+ if let Some(ref mut list) = tokenizer.tokenize_state.document_exits[index] {
+ list.append(&mut exits);
+ } else {
+ tokenizer.tokenize_state.document_exits[index] = Some(exits);
+ }
+ }
+
+ child.interrupt = false;
+}
+
+// Inject everything together.
+fn resolve(tokenizer: &mut Tokenizer) {
+ let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
+
+ // First, add the container exits into `child`.
+ let mut child_index = 0;
+ let mut line = 0;
+
+ while child_index < child.events.len() {
+ let event = &child.events[child_index];
+
+ if event.kind == Kind::Enter
+ && (event.name == Name::LineEnding || event.name == Name::BlankLineEnding)
+ {
+ if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() {
+ let mut exit_index = 0;
+ while exit_index < exits.len() {
+ exits[exit_index].point = event.point.clone();
+ exit_index += 1;
+ }
+
+ child.map.add(child_index, 0, exits);
+ }
+
+ line += 1;
+ }
+
+ child_index += 1;
+ }
+
+ child.map.consume(&mut child.events);
+
+ // Now, add all child events into our parent document tokenizer.
+ divide_events(
+ &mut tokenizer.map,
+ &tokenizer.events,
+ skip::to(&tokenizer.events, 0, &[Name::Data]),
+ &mut child.events,
+ );
+
+ // Replace the flow data with actual events.
+ tokenizer.map.consume(&mut tokenizer.events);
+
+ // Now, add some final container exits due to the EOF.
+ // We can’t inject them into the child earlier, as they are “outside” its
+ // linked data.
+ if line < tokenizer.tokenize_state.document_exits.len() {
+ if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() {
+ let mut exit_index = 0;
+ while exit_index < exits.len() {
+ exits[exit_index].point = tokenizer.point.clone();
+ exit_index += 1;
+ }
+
+ tokenizer.events.append(&mut exits);
+ }
+ }
+
+ // Add the resolvers from child.
+ tokenizer
+ .resolvers
+ .append(&mut child.resolvers.split_off(0));
+
+ tokenizer
+ .tokenize_state
+ .definitions
+ .append(&mut child.tokenize_state.definitions.split_off(0));
+}
diff --git a/src/construct/flow.rs b/src/construct/flow.rs
new file mode 100644
index 0000000..08c7891
--- /dev/null
+++ b/src/construct/flow.rs
@@ -0,0 +1,254 @@
+//! The flow content type.
+//!
+//! **Flow** represents the sections, such as headings and code, which are
+//! parsed per line.
+//! An example is HTML, which has a certain starting condition (such as
+//! `<script>` on its own line), then continues for a while, until an end
+//! condition is found (such as `</style>`).
+//! If that line with an end condition is never found, that flow goes until
+//! the end.
+//!
+//! The constructs found in flow are:
+//!
+//! * [Blank line][crate::construct::blank_line]
+//! * [Code (fenced)][crate::construct::code_fenced]
+//! * [Code (indented)][crate::construct::code_indented]
+//! * [Definition][crate::construct::definition]
+//! * [Heading (atx)][crate::construct::heading_atx]
+//! * [Heading (setext)][crate::construct::heading_setext]
+//! * [HTML (flow)][crate::construct::html_flow]
+//! * [Thematic break][crate::construct::thematic_break]
+
+use crate::event::Name;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Start of flow.
+//
+/// ```markdown
+/// > | ## alpha
+/// ^
+/// > | bravo
+/// ^
+/// > | ***
+/// ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'`' | b'~') => {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeParagraph),
+ );
+ State::Retry(StateName::CodeFencedStart)
+ }
+ Some(b'<') => {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeParagraph),
+ );
+ State::Retry(StateName::HtmlFlowStart)
+ }
+ Some(b'#') => {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeParagraph),
+ );
+ State::Retry(StateName::HeadingAtxStart)
+ }
+ // Note: `-` is also used in thematic breaks, so it’s not included here.
+ Some(b'=') => {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeParagraph),
+ );
+ State::Retry(StateName::HeadingSetextStart)
+ }
+ Some(b'*' | b'_') => {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeParagraph),
+ );
+ State::Retry(StateName::ThematicBreakStart)
+ }
+ Some(b'[') => {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeParagraph),
+ );
+ State::Retry(StateName::DefinitionStart)
+ }
+ // Actual parsing: blank line? Indented code? Indented anything?
+ // Also includes `-` which can be a setext heading underline or a thematic break.
+ None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
+ // Must be a paragraph.
+ Some(_) => {
+ tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
+ State::Retry(StateName::ParagraphStart)
+ }
+ }
+}
+
+/// At blank line.
+///
+/// ```markdown
+/// > | ␠␠␊
+/// ^
+/// ```
+pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowBlankLineAfter),
+ State::Next(StateName::FlowBeforeCodeIndented),
+ );
+ State::Retry(StateName::BlankLineStart)
+}
+
+/// At code (indented).
+///
+/// ```markdown
+/// > | ␠␠␠␠a
+/// ^
+/// ```
+pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeCodeFenced),
+ );
+ State::Retry(StateName::CodeIndentedStart)
+}
+
+/// At code (fenced).
+///
+/// ````markdown
+/// > | ```
+/// ^
+/// ````
+pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeHtml),
+ );
+ State::Retry(StateName::CodeFencedStart)
+}
+
+/// At html (flow).
+///
+/// ```markdown
+/// > | <a>
+/// ^
+/// ```
+pub fn before_html(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeHeadingAtx),
+ );
+ State::Retry(StateName::HtmlFlowStart)
+}
+
+/// At heading (atx).
+///
+/// ```markdown
+/// > | # a
+/// ^
+/// ```
+pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeHeadingSetext),
+ );
+ State::Retry(StateName::HeadingAtxStart)
+}
+
+/// At heading (setext).
+///
+/// ```markdown
+/// | a
+/// > | =
+/// ^
+/// ```
+pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeThematicBreak),
+ );
+ State::Retry(StateName::HeadingSetextStart)
+}
+
+/// At thematic break.
+///
+/// ```markdown
+/// > | ***
+/// ^
+/// ```
+pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeDefinition),
+ );
+ State::Retry(StateName::ThematicBreakStart)
+}
+
+/// At definition.
+///
+/// ```markdown
+/// > | [a]: b
+/// ^
+/// ```
+pub fn before_definition(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeParagraph),
+ );
+ State::Retry(StateName::DefinitionStart)
+}
+
+/// At paragraph.
+///
+/// ```markdown
+/// > | a
+/// ^
+/// ```
+pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
+ State::Retry(StateName::ParagraphStart)
+}
+
+/// After blank line.
+///
+/// ```markdown
+/// > | ␠␠␊
+/// ^
+/// ```
+pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => State::Ok,
+ Some(b'\n') => {
+ tokenizer.enter(Name::BlankLineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::BlankLineEnding);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
+ State::Next(StateName::FlowStart)
+ }
+ _ => unreachable!("expected eol/eof"),
+ }
+}
+
+/// After flow.
+///
+/// ```markdown
+/// > | # a␊
+/// ^
+/// ```
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => State::Ok,
+ Some(b'\n') => {
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ State::Next(StateName::FlowStart)
+ }
+ _ => unreachable!("expected eol/eof"),
+ }
+}
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index 79d8150..1fafa0b 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -33,7 +33,7 @@
//! * [`hard-break-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/hard-break-escape.js)
//! * [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks)
//!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [hard_break_trailing]: crate::construct::partial_whitespace
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 30c22f3..3bcff54 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -47,7 +47,7 @@
//! * [`heading-atx.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/heading-atx.js)
//! * [*§ 4.2 ATX headings* in `CommonMark`](https://spec.commonmark.org/0.30/#atx-headings)
//!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
//! [heading_setext]: crate::construct::heading_setext
//! [hard_break_escape]: crate::construct::hard_break_escape
//! [html]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 1f6270a..043104a 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -49,7 +49,7 @@
//! * [`setext-underline.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/setext-underline.js)
//! * [*§ 4.3 Setext headings* in `CommonMark`](https://spec.commonmark.org/0.30/#setext-headings)
//!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
//! [heading_atx]: crate::construct::heading_atx
//! [thematic_break]: crate::construct::thematic_break
//! [hard_break_escape]: crate::construct::hard_break_escape
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index e90abc4..38e33f8 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -91,7 +91,7 @@
//! * [`html-flow.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-flow.js)
//! * [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks)
//!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
//! [html_text]: crate::construct::html_text
//! [paragraph]: crate::construct::paragraph
//! [html_raw_names]: crate::constant::HTML_RAW_NAMES
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index c3b0a65..fde0847 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -50,7 +50,7 @@
//! * [`html-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-text.js)
//! * [*§ 6.6 Raw HTML* in `CommonMark`](https://spec.commonmark.org/0.30/#raw-html)
//!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
//! [html_flow]: crate::construct::html_flow
//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 5d8f17a..09716b7 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -132,8 +132,8 @@
//! * [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links)
//! * [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images)
//!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
//! [destination]: crate::construct::partial_destination
//! [title]: crate::construct::partial_title
//! [label]: crate::construct::partial_label
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index e6a75d3..ffc1aee 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -24,7 +24,7 @@
//! * [`label-start-image.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-start-image.js)
//! * [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images)
//!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
//! [label_end]: crate::construct::label_end
//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index 7b6ac6e..dad6884 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -23,7 +23,7 @@
//! * [`label-start-link.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-start-link.js)
//! * [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links)
//!
-//! [text]: crate::content::text
+//! [text]: crate::construct::text
//! [label_end]: crate::construct::label_end
//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
index 5161254..a70906a 100644
--- a/src/construct/list_item.rs
+++ b/src/construct/list_item.rs
@@ -38,7 +38,7 @@
//! * [*§ 5.2 List items* in `CommonMark`](https://spec.commonmark.org/0.30/#list-items)
//! * [*§ 5.3 Lists* in `CommonMark`](https://spec.commonmark.org/0.30/#lists)
//!
-//! [document]: crate::content::document
+//! [document]: crate::construct::document
//! [html-li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element
//! [html-ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element
//! [html-ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 566bb30..49868e9 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -76,6 +76,8 @@ pub mod code_fenced;
pub mod code_indented;
pub mod code_text;
pub mod definition;
+pub mod document;
+pub mod flow;
pub mod hard_break_escape;
pub mod heading_atx;
pub mod heading_setext;
@@ -95,4 +97,6 @@ pub mod partial_space_or_tab;
pub mod partial_space_or_tab_eol;
pub mod partial_title;
pub mod partial_whitespace;
+pub mod string;
+pub mod text;
pub mod thematic_break;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index b33346a..5f89ef9 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -26,8 +26,8 @@
//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)
//! * [*§ 4.8 Paragraphs* in `CommonMark`](https://spec.commonmark.org/0.30/#paragraphs)
//!
-//! [flow]: crate::content::flow
-//! [text]: crate::content::text
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
//! [autolink]: crate::construct::autolink
//! [code_text]: crate::construct::code_text
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 86e48c7..3ffa646 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -3,8 +3,8 @@
//! It can include anything (including line endings), and stops at certain
//! characters.
//!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
use crate::event::{Kind, Name};
use crate::resolve::Name as ResolveName;
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index d2477ab..b5254d3 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -65,7 +65,7 @@
//! * [`micromark-factory-destination/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-destination/dev/index.js)
//!
//! [definition]: crate::construct::definition
-//! [string]: crate::content::string
+//! [string]: crate::construct::string
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [label_end]: crate::construct::label_end
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 20a7b15..00d613a 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -48,7 +48,7 @@
//! * [`micromark-factory-label/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-label/dev/index.js)
//!
//! [definition]: crate::construct::definition
-//! [string]: crate::content::string
+//! [string]: crate::construct::string
//! [attention]: crate::construct::attention
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
index 3bbf7cc..5eec71f 100644
--- a/src/construct/partial_non_lazy_continuation.rs
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -5,7 +5,7 @@
//! For example, [html (flow)][html_flow] and code ([fenced][code_fenced],
//! [indented][code_indented]), stop when next line is lazy.
//!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
//! [code_fenced]: crate::construct::code_fenced
//! [code_indented]: crate::construct::code_indented
//! [html_flow]: crate::construct::html_flow
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index f0c4931..50d26be 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -25,7 +25,7 @@
//! * [`micromark-factory-title/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-title/dev/index.js)
//!
//! [definition]: crate::construct::definition
-//! [string]: crate::content::string
+//! [string]: crate::construct::string
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [label_end]: crate::construct::label_end
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index 7152881..bf06df9 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -38,8 +38,8 @@
//! * [`initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js)
//! * [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks)
//!
-//! [string]: crate::content::string
-//! [text]: crate::content::text
+//! [string]: crate::construct::string
+//! [text]: crate::construct::text
//! [hard_break_escape]: crate::construct::hard_break_escape
//! [character_escape]: crate::construct::character_escape
//! [hard_break_prefix_size_min]: crate::constant::HARD_BREAK_PREFIX_SIZE_MIN
diff --git a/src/construct/string.rs b/src/construct/string.rs
new file mode 100644
index 0000000..698a51d
--- /dev/null
+++ b/src/construct/string.rs
@@ -0,0 +1,76 @@
+//! The string content type.
+//!
+//! **String** is a limited [text][] like content type which only allows
+//! character escapes and character references.
+//! It exists in things such as identifiers (media references, definitions),
+//! titles, URLs, code (fenced) info and meta parts.
+//!
+//! The constructs found in string are:
+//!
+//! * [Character escape][crate::construct::character_escape]
+//! * [Character reference][crate::construct::character_reference]
+//!
+//! [text]: crate::construct::text
+
+use crate::construct::partial_whitespace::resolve_whitespace;
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Characters that can start something in string.
+const MARKERS: [u8; 2] = [b'&', b'\\'];
+
+/// Start of string.
+///
+/// ````markdown
+/// > | ```js
+/// ^
+/// ````
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.register_resolver(ResolveName::String);
+ tokenizer.tokenize_state.markers = &MARKERS;
+ State::Retry(StateName::StringBefore)
+}
+
+/// Before string.
+///
+/// ````markdown
+/// > | ```js
+/// ^
+/// ````
+pub fn before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => State::Ok,
+ Some(b'&') => {
+ tokenizer.attempt(
+ State::Next(StateName::StringBefore),
+ State::Next(StateName::StringBeforeData),
+ );
+ State::Retry(StateName::CharacterReferenceStart)
+ }
+ Some(b'\\') => {
+ tokenizer.attempt(
+ State::Next(StateName::StringBefore),
+ State::Next(StateName::StringBeforeData),
+ );
+ State::Retry(StateName::CharacterEscapeStart)
+ }
+ _ => State::Retry(StateName::StringBeforeData),
+ }
+}
+
+/// At data.
+///
+/// ````markdown
+/// > | ```js
+/// ^
+/// ````
+pub fn before_data(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok);
+ State::Retry(StateName::DataStart)
+}
+
+/// Resolve whitespace in string.
+pub fn resolve(tokenizer: &mut Tokenizer) {
+ resolve_whitespace(tokenizer, false, false);
+}
diff --git a/src/construct/text.rs b/src/construct/text.rs
new file mode 100644
index 0000000..5c13dba
--- /dev/null
+++ b/src/construct/text.rs
@@ -0,0 +1,173 @@
+//! The text content type.
+//!
+//! **Text** contains phrasing content such as
+//! [attention][crate::construct::attention] (emphasis, strong),
+//! [code (text)][crate::construct::code_text], and actual text.
+//!
+//! The constructs found in text are:
+//!
+//! * [Attention][crate::construct::attention]
+//! * [Autolink][crate::construct::autolink]
+//! * [Character escape][crate::construct::character_escape]
+//! * [Character reference][crate::construct::character_reference]
+//! * [Code (text)][crate::construct::code_text]
+//! * [Hard break (escape)][crate::construct::hard_break_escape]
+//! * [HTML (text)][crate::construct::html_text]
+//! * [Label start (image)][crate::construct::label_start_image]
+//! * [Label start (link)][crate::construct::label_start_link]
+//! * [Label end][crate::construct::label_end]
+//!
+//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
+//! > [whitespace][crate::construct::partial_whitespace].
+
+use crate::construct::partial_whitespace::resolve_whitespace;
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Characters that can start something in text.
+const MARKERS: [u8; 9] = [
+ b'!', // `label_start_image`
+ b'&', // `character_reference`
+ b'*', // `attention`
+ b'<', // `autolink`, `html_text`
+ b'[', // `label_start_link`
+ b'\\', // `character_escape`, `hard_break_escape`
+ b']', // `label_end`
+ b'_', // `attention`
+ b'`', // `code_text`
+];
+
+/// Start of text.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.register_resolver(ResolveName::Text);
+ tokenizer.tokenize_state.markers = &MARKERS;
+ State::Retry(StateName::TextBefore)
+}
+
+/// Before text.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// ```
+pub fn before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => State::Ok,
+ Some(b'!') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::LabelStartImageStart)
+ }
+ Some(b'&') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::CharacterReferenceStart)
+ }
+ Some(b'*' | b'_') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::AttentionStart)
+ }
+ // `autolink`, `html_text` (order does not matter)
+ Some(b'<') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeHtml),
+ );
+ State::Retry(StateName::AutolinkStart)
+ }
+ Some(b'[') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::LabelStartLinkStart)
+ }
+ Some(b'\\') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeHardBreakEscape),
+ );
+ State::Retry(StateName::CharacterEscapeStart)
+ }
+ Some(b']') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::LabelEndStart)
+ }
+ Some(b'`') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::CodeTextStart)
+ }
+ _ => State::Retry(StateName::TextBeforeData),
+ }
+}
+
+/// Before html (text).
+///
+/// At `<`, which wasn’t an autolink.
+///
+/// ```markdown
+/// > | a <b>
+/// ^
+/// ```
+pub fn before_html(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::HtmlTextStart)
+}
+
+/// Before hard break escape.
+///
+/// At `\`, which wasn’t a character escape.
+///
+/// ```markdown
+/// > | a \␊
+/// ^
+/// ```
+pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::HardBreakEscapeStart)
+}
+
+/// Before data.
+///
+/// ```markdown
+/// > | a
+/// ^
+/// ```
+pub fn before_data(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok);
+ State::Retry(StateName::DataStart)
+}
+
+/// Resolve whitespace.
+pub fn resolve(tokenizer: &mut Tokenizer) {
+ resolve_whitespace(
+ tokenizer,
+ tokenizer.parse_state.constructs.hard_break_trailing,
+ true,
+ );
+}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 1b581ea..9496d98 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -43,7 +43,7 @@
//! * [`thematic-break.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/thematic-break.js)
//! * [*§ 4.1 Thematic breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#thematic-breaks)
//!
-//! [flow]: crate::content::flow
+//! [flow]: crate::construct::flow
//! [heading_setext]: crate::construct::heading_setext
//! [list-item]: crate::construct::list_item
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-hr-element