aboutsummaryrefslogtreecommitdiffstats
path: root/src/content
diff options
context:
space:
mode:
Diffstat (limited to 'src/content')
-rw-r--r--src/content/document.rs492
-rw-r--r--src/content/flow.rs254
-rw-r--r--src/content/mod.rs11
-rw-r--r--src/content/string.rs76
-rw-r--r--src/content/text.rs173
5 files changed, 0 insertions, 1006 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
deleted file mode 100644
index 9def6c5..0000000
--- a/src/content/document.rs
+++ /dev/null
@@ -1,492 +0,0 @@
-//! The document content type.
-//!
-//! **Document** represents the containers, such as block quotes and lists,
-//! which structure the document and contain other sections.
-//!
-//! The constructs found in flow are:
-//!
-//! * [Block quote][crate::construct::block_quote]
-//! * [List][crate::construct::list_item]
-
-use crate::event::{Content, Event, Kind, Link, Name};
-use crate::state::{Name as StateName, State};
-use crate::subtokenize::divide_events;
-use crate::tokenizer::{Container, ContainerState, Tokenizer};
-use crate::util::skip;
-
-/// Phases where we can exit containers.
-#[derive(Debug, PartialEq)]
-enum Phase {
- /// After parsing a line of lazy flow which resulted in something that
- /// exits containers before the line.
- ///
- /// ```markdown
- /// | * a
- /// > | ```js
- /// ^
- /// | b
- /// | ```
- /// ```
- After,
- /// When a new container replaces an existing container.
- ///
- /// ```markdown
- /// | * a
- /// > | > b
- /// ^
- /// ```
- Prefix,
- /// After everything.
- ///
- /// ```markdown
- /// > | * a
- /// ^
- /// ```
- Eof,
-}
-
-/// Start of document, at an optional BOM.
-///
-/// ```markdown
-/// > | a
-/// ^
-/// ```
-pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.tokenize_state.document_child = Some(Box::new(Tokenizer::new(
- tokenizer.point.clone(),
- tokenizer.parse_state,
- )));
-
- tokenizer.attempt(
- State::Next(StateName::DocumentContainerExistingBefore),
- State::Next(StateName::DocumentContainerExistingBefore),
- );
-
- State::Retry(StateName::BomStart)
-}
-
-/// At optional existing containers.
-//
-/// ```markdown
-/// | * a
-/// > | > b
-/// ^
-/// ```
-pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
- // If there are more existing containers, check whether the next one continues.
- if tokenizer.tokenize_state.document_continued
- < tokenizer.tokenize_state.document_container_stack.len()
- {
- let container = &tokenizer.tokenize_state.document_container_stack
- [tokenizer.tokenize_state.document_continued];
-
- let name = match container.kind {
- Container::BlockQuote => StateName::BlockQuoteContStart,
- Container::ListItem => StateName::ListItemContStart,
- };
-
- tokenizer.attempt(
- State::Next(StateName::DocumentContainerExistingAfter),
- State::Next(StateName::DocumentContainerNewBefore),
- );
-
- State::Retry(name)
- }
- // Otherwise, check new containers.
- else {
- State::Retry(StateName::DocumentContainerNewBefore)
- }
-}
-
-/// After continued existing container.
-//
-/// ```markdown
-/// | * a
-/// > | b
-/// ^
-/// ```
-pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.tokenize_state.document_continued += 1;
- State::Retry(StateName::DocumentContainerExistingBefore)
-}
-
-/// At new containers.
-//
-/// ```markdown
-/// > | * a
-/// ^
-/// > | > b
-/// ^
-/// ```
-pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
- // If we have completely continued, restore the flow’s past `interrupt`
- // status.
- if tokenizer.tokenize_state.document_continued
- == tokenizer.tokenize_state.document_container_stack.len()
- {
- let child = tokenizer.tokenize_state.document_child.as_ref().unwrap();
-
- tokenizer.interrupt = child.interrupt;
-
- // …and if we’re in a concrete construct, new containers can’t “pierce”
- // into them.
- if child.concrete {
- return State::Retry(StateName::DocumentContainersAfter);
- }
- }
-
- // Check for a new container.
- // Block quote?
- // Add a new container at the end of the stack.
- let tail = tokenizer.tokenize_state.document_container_stack.len();
- tokenizer
- .tokenize_state
- .document_container_stack
- .push(ContainerState {
- kind: Container::BlockQuote,
- blank_initial: false,
- size: 0,
- });
- // Swap the existing container with the new one.
- tokenizer
- .tokenize_state
- .document_container_stack
- .swap(tokenizer.tokenize_state.document_continued, tail);
-
- tokenizer.attempt(
- State::Next(StateName::DocumentContainerNewAfter),
- State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote),
- );
- State::Retry(StateName::BlockQuoteStart)
-}
-
-/// At new container, but not a block quote.
-//
-/// ```markdown
-/// > | * a
-/// ^
-/// ```
-pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State {
- // List item?
- // We replace the empty block quote container for this new list one.
- tokenizer.tokenize_state.document_container_stack
- [tokenizer.tokenize_state.document_continued] = ContainerState {
- kind: Container::ListItem,
- blank_initial: false,
- size: 0,
- };
-
- tokenizer.attempt(
- State::Next(StateName::DocumentContainerNewAfter),
- State::Next(StateName::DocumentContainerNewBeforeNotList),
- );
- State::Retry(StateName::ListItemStart)
-}
-
-/// At new container, but not a list (or block quote).
-//
-/// ```markdown
-/// > | a
-/// ^
-/// ```
-pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State {
- // It wasn’t a new block quote or a list.
- // Swap the new container (in the middle) with the existing one (at the end).
- // Drop what was in the middle.
- tokenizer
- .tokenize_state
- .document_container_stack
- .swap_remove(tokenizer.tokenize_state.document_continued);
-
- State::Retry(StateName::DocumentContainersAfter)
-}
-
-/// After new container.
-///
-/// ```markdown
-/// > | * a
-/// ^
-/// > | > b
-/// ^
-/// ```
-pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
- // It was a new block quote or a list.
- // Swap the new container (in the middle) with the existing one (at the end).
- // Take the new container.
- let container = tokenizer
- .tokenize_state
- .document_container_stack
- .swap_remove(tokenizer.tokenize_state.document_continued);
-
- // If we did not continue all existing containers, and there is a new one,
- // close the flow and those containers.
- if tokenizer.tokenize_state.document_continued
- != tokenizer.tokenize_state.document_container_stack.len()
- {
- exit_containers(tokenizer, &Phase::Prefix);
- }
-
- tokenizer
- .tokenize_state
- .document_container_stack
- .push(container);
- tokenizer.tokenize_state.document_continued += 1;
- tokenizer.interrupt = false;
- State::Retry(StateName::DocumentContainerNewBefore)
-}
-
-/// After containers, at flow.
-//
-/// ```markdown
-/// > | * a
-/// ^
-/// > | > b
-/// ^
-/// ```
-pub fn containers_after(tokenizer: &mut Tokenizer) -> State {
- let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
-
- child.lazy = tokenizer.tokenize_state.document_continued
- != tokenizer.tokenize_state.document_container_stack.len();
- child.define_skip(tokenizer.point.clone());
-
- match tokenizer.current {
- // Note: EOL is part of data.
- None => State::Retry(StateName::DocumentFlowEnd),
- Some(_) => {
- let current = tokenizer.events.len();
- let previous = tokenizer.tokenize_state.document_data_index;
- if let Some(previous) = previous {
- tokenizer.events[previous].link.as_mut().unwrap().next = Some(current);
- }
- tokenizer.tokenize_state.document_data_index = Some(current);
- tokenizer.enter_link(
- Name::Data,
- Link {
- previous,
- next: None,
- content: Content::Flow,
- },
- );
- State::Retry(StateName::DocumentFlowInside)
- }
- }
-}
-
-/// In flow.
-//
-/// ```markdown
-/// > | * ab
-/// ^
-/// ```
-pub fn flow_inside(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None => {
- tokenizer.exit(Name::Data);
- State::Retry(StateName::DocumentFlowEnd)
- }
- // Note: EOL is part of data.
- Some(b'\n') => {
- tokenizer.consume();
- tokenizer.exit(Name::Data);
- State::Next(StateName::DocumentFlowEnd)
- }
- Some(_) => {
- tokenizer.consume();
- State::Next(StateName::DocumentFlowInside)
- }
- }
-}
-
-/// After flow (after eol or at eof).
-//
-/// ```markdown
-/// | * a
-/// > | > b
-/// ^ ^
-/// ```
-pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
- let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
- let state = tokenizer
- .tokenize_state
- .document_child_state
- .unwrap_or(State::Next(StateName::FlowStart));
-
- tokenizer.tokenize_state.document_exits.push(None);
-
- let state = child.push(
- (child.point.index, child.point.vs),
- (tokenizer.point.index, tokenizer.point.vs),
- state,
- );
-
- let paragraph = matches!(state, State::Next(StateName::ParagraphInside))
- || (!child.events.is_empty()
- && child.events
- [skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding])]
- .name
- == Name::Paragraph);
-
- tokenizer.tokenize_state.document_child_state = Some(state);
-
- if child.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before {
- tokenizer.tokenize_state.document_continued =
- tokenizer.tokenize_state.document_container_stack.len();
- }
-
- if tokenizer.tokenize_state.document_continued
- != tokenizer.tokenize_state.document_container_stack.len()
- {
- exit_containers(tokenizer, &Phase::After);
- }
-
- match tokenizer.current {
- None => {
- tokenizer.tokenize_state.document_continued = 0;
- exit_containers(tokenizer, &Phase::Eof);
- resolve(tokenizer);
- State::Ok
- }
- Some(_) => {
- tokenizer.tokenize_state.document_continued = 0;
- tokenizer.tokenize_state.document_paragraph_before = paragraph;
- // Containers would only be interrupting if we’ve continued.
- tokenizer.interrupt = false;
- State::Retry(StateName::DocumentContainerExistingBefore)
- }
- }
-}
-
-/// Close containers (and flow if needed).
-fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
- let mut stack_close = tokenizer
- .tokenize_state
- .document_container_stack
- .split_off(tokenizer.tokenize_state.document_continued);
-
- let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
-
- // Flush if needed.
- if *phase != Phase::After {
- let state = tokenizer
- .tokenize_state
- .document_child_state
- .take()
- .unwrap_or(State::Next(StateName::FlowStart));
-
- child.flush(state, false);
- }
-
- if !stack_close.is_empty() {
- let index = tokenizer.tokenize_state.document_exits.len()
- - (if *phase == Phase::After { 2 } else { 1 });
- let mut exits = Vec::with_capacity(stack_close.len());
-
- while !stack_close.is_empty() {
- let container = stack_close.pop().unwrap();
- let name = match container.kind {
- Container::BlockQuote => Name::BlockQuote,
- Container::ListItem => Name::ListItem,
- };
-
- exits.push(Event {
- kind: Kind::Exit,
- name: name.clone(),
- point: tokenizer.point.clone(),
- link: None,
- });
-
- let mut stack_index = tokenizer.stack.len();
- let mut found = false;
-
- while stack_index > 0 {
- stack_index -= 1;
-
- if tokenizer.stack[stack_index] == name {
- tokenizer.stack.remove(stack_index);
- found = true;
- break;
- }
- }
-
- debug_assert!(found, "expected to find container token to exit");
- }
-
- if let Some(ref mut list) = tokenizer.tokenize_state.document_exits[index] {
- list.append(&mut exits);
- } else {
- tokenizer.tokenize_state.document_exits[index] = Some(exits);
- }
- }
-
- child.interrupt = false;
-}
-
-// Inject everything together.
-fn resolve(tokenizer: &mut Tokenizer) {
- let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
-
- // First, add the container exits into `child`.
- let mut child_index = 0;
- let mut line = 0;
-
- while child_index < child.events.len() {
- let event = &child.events[child_index];
-
- if event.kind == Kind::Enter
- && (event.name == Name::LineEnding || event.name == Name::BlankLineEnding)
- {
- if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() {
- let mut exit_index = 0;
- while exit_index < exits.len() {
- exits[exit_index].point = event.point.clone();
- exit_index += 1;
- }
-
- child.map.add(child_index, 0, exits);
- }
-
- line += 1;
- }
-
- child_index += 1;
- }
-
- child.map.consume(&mut child.events);
-
- // Now, add all child events into our parent document tokenizer.
- divide_events(
- &mut tokenizer.map,
- &tokenizer.events,
- skip::to(&tokenizer.events, 0, &[Name::Data]),
- &mut child.events,
- );
-
- // Replace the flow data with actual events.
- tokenizer.map.consume(&mut tokenizer.events);
-
- // Now, add some final container exits due to the EOF.
- // We can’t inject them into the child earlier, as they are “outside” its
- // linked data.
- if line < tokenizer.tokenize_state.document_exits.len() {
- if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() {
- let mut exit_index = 0;
- while exit_index < exits.len() {
- exits[exit_index].point = tokenizer.point.clone();
- exit_index += 1;
- }
-
- tokenizer.events.append(&mut exits);
- }
- }
-
- // Add the resolvers from child.
- tokenizer
- .resolvers
- .append(&mut child.resolvers.split_off(0));
-
- tokenizer
- .tokenize_state
- .definitions
- .append(&mut child.tokenize_state.definitions.split_off(0));
-}
diff --git a/src/content/flow.rs b/src/content/flow.rs
deleted file mode 100644
index 08c7891..0000000
--- a/src/content/flow.rs
+++ /dev/null
@@ -1,254 +0,0 @@
-//! The flow content type.
-//!
-//! **Flow** represents the sections, such as headings and code, which are
-//! parsed per line.
-//! An example is HTML, which has a certain starting condition (such as
-//! `<script>` on its own line), then continues for a while, until an end
-//! condition is found (such as `</style>`).
-//! If that line with an end condition is never found, that flow goes until
-//! the end.
-//!
-//! The constructs found in flow are:
-//!
-//! * [Blank line][crate::construct::blank_line]
-//! * [Code (fenced)][crate::construct::code_fenced]
-//! * [Code (indented)][crate::construct::code_indented]
-//! * [Definition][crate::construct::definition]
-//! * [Heading (atx)][crate::construct::heading_atx]
-//! * [Heading (setext)][crate::construct::heading_setext]
-//! * [HTML (flow)][crate::construct::html_flow]
-//! * [Thematic break][crate::construct::thematic_break]
-
-use crate::event::Name;
-use crate::state::{Name as StateName, State};
-use crate::tokenizer::Tokenizer;
-
-/// Start of flow.
-//
-/// ```markdown
-/// > | ## alpha
-/// ^
-/// > | bravo
-/// ^
-/// > | ***
-/// ^
-/// ```
-pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'`' | b'~') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::CodeFencedStart)
- }
- Some(b'<') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::HtmlFlowStart)
- }
- Some(b'#') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::HeadingAtxStart)
- }
- // Note: `-` is also used in thematic breaks, so it’s not included here.
- Some(b'=') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::HeadingSetextStart)
- }
- Some(b'*' | b'_') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::ThematicBreakStart)
- }
- Some(b'[') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::DefinitionStart)
- }
- // Actual parsing: blank line? Indented code? Indented anything?
- // Also includes `-` which can be a setext heading underline or a thematic break.
- None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
- // Must be a paragraph.
- Some(_) => {
- tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
- State::Retry(StateName::ParagraphStart)
- }
- }
-}
-
-/// At blank line.
-///
-/// ```markdown
-/// > | ␠␠␊
-/// ^
-/// ```
-pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowBlankLineAfter),
- State::Next(StateName::FlowBeforeCodeIndented),
- );
- State::Retry(StateName::BlankLineStart)
-}
-
-/// At code (indented).
-///
-/// ```markdown
-/// > | ␠␠␠␠a
-/// ^
-/// ```
-pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeCodeFenced),
- );
- State::Retry(StateName::CodeIndentedStart)
-}
-
-/// At code (fenced).
-///
-/// ````markdown
-/// > | ```
-/// ^
-/// ````
-pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeHtml),
- );
- State::Retry(StateName::CodeFencedStart)
-}
-
-/// At html (flow).
-///
-/// ```markdown
-/// > | <a>
-/// ^
-/// ```
-pub fn before_html(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeHeadingAtx),
- );
- State::Retry(StateName::HtmlFlowStart)
-}
-
-/// At heading (atx).
-///
-/// ```markdown
-/// > | # a
-/// ^
-/// ```
-pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeHeadingSetext),
- );
- State::Retry(StateName::HeadingAtxStart)
-}
-
-/// At heading (setext).
-///
-/// ```markdown
-/// | a
-/// > | =
-/// ^
-/// ```
-pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeThematicBreak),
- );
- State::Retry(StateName::HeadingSetextStart)
-}
-
-/// At thematic break.
-///
-/// ```markdown
-/// > | ***
-/// ^
-/// ```
-pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeDefinition),
- );
- State::Retry(StateName::ThematicBreakStart)
-}
-
-/// At definition.
-///
-/// ```markdown
-/// > | [a]: b
-/// ^
-/// ```
-pub fn before_definition(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::DefinitionStart)
-}
-
-/// At paragraph.
-///
-/// ```markdown
-/// > | a
-/// ^
-/// ```
-pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
- State::Retry(StateName::ParagraphStart)
-}
-
-/// After blank line.
-///
-/// ```markdown
-/// > | ␠␠␊
-/// ^
-/// ```
-pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None => State::Ok,
- Some(b'\n') => {
- tokenizer.enter(Name::BlankLineEnding);
- tokenizer.consume();
- tokenizer.exit(Name::BlankLineEnding);
- // Feel free to interrupt.
- tokenizer.interrupt = false;
- State::Next(StateName::FlowStart)
- }
- _ => unreachable!("expected eol/eof"),
- }
-}
-
-/// After flow.
-///
-/// ```markdown
-/// > | # a␊
-/// ^
-/// ```
-pub fn after(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None => State::Ok,
- Some(b'\n') => {
- tokenizer.enter(Name::LineEnding);
- tokenizer.consume();
- tokenizer.exit(Name::LineEnding);
- State::Next(StateName::FlowStart)
- }
- _ => unreachable!("expected eol/eof"),
- }
-}
diff --git a/src/content/mod.rs b/src/content/mod.rs
deleted file mode 100644
index af40cc0..0000000
--- a/src/content/mod.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-//! Content types found in markdown.
-//!
-//! * [document][document]
-//! * [flow][flow]
-//! * [string][string]
-//! * [text][text]
-
-pub mod document;
-pub mod flow;
-pub mod string;
-pub mod text;
diff --git a/src/content/string.rs b/src/content/string.rs
deleted file mode 100644
index ec4fce2..0000000
--- a/src/content/string.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-//! The string content type.
-//!
-//! **String** is a limited [text][] like content type which only allows
-//! character escapes and character references.
-//! It exists in things such as identifiers (media references, definitions),
-//! titles, URLs, code (fenced) info and meta parts.
-//!
-//! The constructs found in string are:
-//!
-//! * [Character escape][crate::construct::character_escape]
-//! * [Character reference][crate::construct::character_reference]
-//!
-//! [text]: crate::content::text
-
-use crate::construct::partial_whitespace::resolve_whitespace;
-use crate::resolve::Name as ResolveName;
-use crate::state::{Name as StateName, State};
-use crate::tokenizer::Tokenizer;
-
-/// Characters that can start something in string.
-const MARKERS: [u8; 2] = [b'&', b'\\'];
-
-/// Start of string.
-///
-/// ````markdown
-/// > | ```js
-/// ^
-/// ````
-pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.register_resolver(ResolveName::String);
- tokenizer.tokenize_state.markers = &MARKERS;
- State::Retry(StateName::StringBefore)
-}
-
-/// Before string.
-///
-/// ````markdown
-/// > | ```js
-/// ^
-/// ````
-pub fn before(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None => State::Ok,
- Some(b'&') => {
- tokenizer.attempt(
- State::Next(StateName::StringBefore),
- State::Next(StateName::StringBeforeData),
- );
- State::Retry(StateName::CharacterReferenceStart)
- }
- Some(b'\\') => {
- tokenizer.attempt(
- State::Next(StateName::StringBefore),
- State::Next(StateName::StringBeforeData),
- );
- State::Retry(StateName::CharacterEscapeStart)
- }
- _ => State::Retry(StateName::StringBeforeData),
- }
-}
-
-/// At data.
-///
-/// ````markdown
-/// > | ```js
-/// ^
-/// ````
-pub fn before_data(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok);
- State::Retry(StateName::DataStart)
-}
-
-/// Resolve whitespace in string.
-pub fn resolve(tokenizer: &mut Tokenizer) {
- resolve_whitespace(tokenizer, false, false);
-}
diff --git a/src/content/text.rs b/src/content/text.rs
deleted file mode 100644
index 5c13dba..0000000
--- a/src/content/text.rs
+++ /dev/null
@@ -1,173 +0,0 @@
-//! The text content type.
-//!
-//! **Text** contains phrasing content such as
-//! [attention][crate::construct::attention] (emphasis, strong),
-//! [code (text)][crate::construct::code_text], and actual text.
-//!
-//! The constructs found in text are:
-//!
-//! * [Attention][crate::construct::attention]
-//! * [Autolink][crate::construct::autolink]
-//! * [Character escape][crate::construct::character_escape]
-//! * [Character reference][crate::construct::character_reference]
-//! * [Code (text)][crate::construct::code_text]
-//! * [Hard break (escape)][crate::construct::hard_break_escape]
-//! * [HTML (text)][crate::construct::html_text]
-//! * [Label start (image)][crate::construct::label_start_image]
-//! * [Label start (link)][crate::construct::label_start_link]
-//! * [Label end][crate::construct::label_end]
-//!
-//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
-//! > [whitespace][crate::construct::partial_whitespace].
-
-use crate::construct::partial_whitespace::resolve_whitespace;
-use crate::resolve::Name as ResolveName;
-use crate::state::{Name as StateName, State};
-use crate::tokenizer::Tokenizer;
-
-/// Characters that can start something in text.
-const MARKERS: [u8; 9] = [
- b'!', // `label_start_image`
- b'&', // `character_reference`
- b'*', // `attention`
- b'<', // `autolink`, `html_text`
- b'[', // `label_start_link`
- b'\\', // `character_escape`, `hard_break_escape`
- b']', // `label_end`
- b'_', // `attention`
- b'`', // `code_text`
-];
-
-/// Start of text.
-///
-/// ```markdown
-/// > | abc
-/// ^
-/// ```
-pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.register_resolver(ResolveName::Text);
- tokenizer.tokenize_state.markers = &MARKERS;
- State::Retry(StateName::TextBefore)
-}
-
-/// Before text.
-///
-/// ```markdown
-/// > | abc
-/// ^
-/// ```
-pub fn before(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None => State::Ok,
- Some(b'!') => {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
- );
- State::Retry(StateName::LabelStartImageStart)
- }
- Some(b'&') => {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
- );
- State::Retry(StateName::CharacterReferenceStart)
- }
- Some(b'*' | b'_') => {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
- );
- State::Retry(StateName::AttentionStart)
- }
- // `autolink`, `html_text` (order does not matter)
- Some(b'<') => {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeHtml),
- );
- State::Retry(StateName::AutolinkStart)
- }
- Some(b'[') => {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
- );
- State::Retry(StateName::LabelStartLinkStart)
- }
- Some(b'\\') => {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeHardBreakEscape),
- );
- State::Retry(StateName::CharacterEscapeStart)
- }
- Some(b']') => {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
- );
- State::Retry(StateName::LabelEndStart)
- }
- Some(b'`') => {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
- );
- State::Retry(StateName::CodeTextStart)
- }
- _ => State::Retry(StateName::TextBeforeData),
- }
-}
-
-/// Before html (text).
-///
-/// At `<`, which wasn’t an autolink.
-///
-/// ```markdown
-/// > | a <b>
-/// ^
-/// ```
-pub fn before_html(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
- );
- State::Retry(StateName::HtmlTextStart)
-}
-
-/// Before hard break escape.
-///
-/// At `\`, which wasn’t a character escape.
-///
-/// ```markdown
-/// > | a \␊
-/// ^
-/// ```
-pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
- );
- State::Retry(StateName::HardBreakEscapeStart)
-}
-
-/// Before data.
-///
-/// ```markdown
-/// > | a
-/// ^
-/// ```
-pub fn before_data(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok);
- State::Retry(StateName::DataStart)
-}
-
-/// Resolve whitespace.
-pub fn resolve(tokenizer: &mut Tokenizer) {
- resolve_whitespace(
- tokenizer,
- tokenizer.parse_state.constructs.hard_break_trailing,
- true,
- );
-}