diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-15 11:40:40 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-15 11:40:40 +0200 |
commit | ee967aa634b5f8e9d30329d587538f1371a5da95 (patch) | |
tree | cdc1461c822e440b24428eb8d431881e216ab8bd /src/content | |
parent | 13135666fac476f3cd6f059147f496533b304097 (diff) | |
download | markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.gz markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.bz2 markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.zip |
Refactor to move `content` to `construct`
Diffstat (limited to 'src/content')
-rw-r--r-- | src/content/document.rs | 492 | ||||
-rw-r--r-- | src/content/flow.rs | 254 | ||||
-rw-r--r-- | src/content/mod.rs | 11 | ||||
-rw-r--r-- | src/content/string.rs | 76 | ||||
-rw-r--r-- | src/content/text.rs | 173 |
5 files changed, 0 insertions, 1006 deletions
diff --git a/src/content/document.rs b/src/content/document.rs deleted file mode 100644 index 9def6c5..0000000 --- a/src/content/document.rs +++ /dev/null @@ -1,492 +0,0 @@ -//! The document content type. -//! -//! **Document** represents the containers, such as block quotes and lists, -//! which structure the document and contain other sections. -//! -//! The constructs found in flow are: -//! -//! * [Block quote][crate::construct::block_quote] -//! * [List][crate::construct::list_item] - -use crate::event::{Content, Event, Kind, Link, Name}; -use crate::state::{Name as StateName, State}; -use crate::subtokenize::divide_events; -use crate::tokenizer::{Container, ContainerState, Tokenizer}; -use crate::util::skip; - -/// Phases where we can exit containers. -#[derive(Debug, PartialEq)] -enum Phase { - /// After parsing a line of lazy flow which resulted in something that - /// exits containers before the line. - /// - /// ```markdown - /// | * a - /// > | ```js - /// ^ - /// | b - /// | ``` - /// ``` - After, - /// When a new container replaces an existing container. - /// - /// ```markdown - /// | * a - /// > | > b - /// ^ - /// ``` - Prefix, - /// After everything. - /// - /// ```markdown - /// > | * a - /// ^ - /// ``` - Eof, -} - -/// Start of document, at an optional BOM. -/// -/// ```markdown -/// > | a -/// ^ -/// ``` -pub fn start(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.document_child = Some(Box::new(Tokenizer::new( - tokenizer.point.clone(), - tokenizer.parse_state, - ))); - - tokenizer.attempt( - State::Next(StateName::DocumentContainerExistingBefore), - State::Next(StateName::DocumentContainerExistingBefore), - ); - - State::Retry(StateName::BomStart) -} - -/// At optional existing containers. -// -/// ```markdown -/// | * a -/// > | > b -/// ^ -/// ``` -pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { - // If there are more existing containers, check whether the next one continues. - if tokenizer.tokenize_state.document_continued - < tokenizer.tokenize_state.document_container_stack.len() - { - let container = &tokenizer.tokenize_state.document_container_stack - [tokenizer.tokenize_state.document_continued]; - - let name = match container.kind { - Container::BlockQuote => StateName::BlockQuoteContStart, - Container::ListItem => StateName::ListItemContStart, - }; - - tokenizer.attempt( - State::Next(StateName::DocumentContainerExistingAfter), - State::Next(StateName::DocumentContainerNewBefore), - ); - - State::Retry(name) - } - // Otherwise, check new containers. - else { - State::Retry(StateName::DocumentContainerNewBefore) - } -} - -/// After continued existing container. -// -/// ```markdown -/// | * a -/// > | b -/// ^ -/// ``` -pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.document_continued += 1; - State::Retry(StateName::DocumentContainerExistingBefore) -} - -/// At new containers. -// -/// ```markdown -/// > | * a -/// ^ -/// > | > b -/// ^ -/// ``` -pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { - // If we have completely continued, restore the flow’s past `interrupt` - // status. - if tokenizer.tokenize_state.document_continued - == tokenizer.tokenize_state.document_container_stack.len() - { - let child = tokenizer.tokenize_state.document_child.as_ref().unwrap(); - - tokenizer.interrupt = child.interrupt; - - // …and if we’re in a concrete construct, new containers can’t “pierce” - // into them. - if child.concrete { - return State::Retry(StateName::DocumentContainersAfter); - } - } - - // Check for a new container. - // Block quote? - // Add a new container at the end of the stack. - let tail = tokenizer.tokenize_state.document_container_stack.len(); - tokenizer - .tokenize_state - .document_container_stack - .push(ContainerState { - kind: Container::BlockQuote, - blank_initial: false, - size: 0, - }); - // Swap the existing container with the new one. - tokenizer - .tokenize_state - .document_container_stack - .swap(tokenizer.tokenize_state.document_continued, tail); - - tokenizer.attempt( - State::Next(StateName::DocumentContainerNewAfter), - State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote), - ); - State::Retry(StateName::BlockQuoteStart) -} - -/// At new container, but not a block quote. -// -/// ```markdown -/// > | * a -/// ^ -/// ``` -pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State { - // List item? - // We replace the empty block quote container for this new list one. - tokenizer.tokenize_state.document_container_stack - [tokenizer.tokenize_state.document_continued] = ContainerState { - kind: Container::ListItem, - blank_initial: false, - size: 0, - }; - - tokenizer.attempt( - State::Next(StateName::DocumentContainerNewAfter), - State::Next(StateName::DocumentContainerNewBeforeNotList), - ); - State::Retry(StateName::ListItemStart) -} - -/// At new container, but not a list (or block quote). -// -/// ```markdown -/// > | a -/// ^ -/// ``` -pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State { - // It wasn’t a new block quote or a list. - // Swap the new container (in the middle) with the existing one (at the end). - // Drop what was in the middle. - tokenizer - .tokenize_state - .document_container_stack - .swap_remove(tokenizer.tokenize_state.document_continued); - - State::Retry(StateName::DocumentContainersAfter) -} - -/// After new container. -/// -/// ```markdown -/// > | * a -/// ^ -/// > | > b -/// ^ -/// ``` -pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { - // It was a new block quote or a list. - // Swap the new container (in the middle) with the existing one (at the end). - // Take the new container. - let container = tokenizer - .tokenize_state - .document_container_stack - .swap_remove(tokenizer.tokenize_state.document_continued); - - // If we did not continue all existing containers, and there is a new one, - // close the flow and those containers. - if tokenizer.tokenize_state.document_continued - != tokenizer.tokenize_state.document_container_stack.len() - { - exit_containers(tokenizer, &Phase::Prefix); - } - - tokenizer - .tokenize_state - .document_container_stack - .push(container); - tokenizer.tokenize_state.document_continued += 1; - tokenizer.interrupt = false; - State::Retry(StateName::DocumentContainerNewBefore) -} - -/// After containers, at flow. -// -/// ```markdown -/// > | * a -/// ^ -/// > | > b -/// ^ -/// ``` -pub fn containers_after(tokenizer: &mut Tokenizer) -> State { - let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); - - child.lazy = tokenizer.tokenize_state.document_continued - != tokenizer.tokenize_state.document_container_stack.len(); - child.define_skip(tokenizer.point.clone()); - - match tokenizer.current { - // Note: EOL is part of data. - None => State::Retry(StateName::DocumentFlowEnd), - Some(_) => { - let current = tokenizer.events.len(); - let previous = tokenizer.tokenize_state.document_data_index; - if let Some(previous) = previous { - tokenizer.events[previous].link.as_mut().unwrap().next = Some(current); - } - tokenizer.tokenize_state.document_data_index = Some(current); - tokenizer.enter_link( - Name::Data, - Link { - previous, - next: None, - content: Content::Flow, - }, - ); - State::Retry(StateName::DocumentFlowInside) - } - } -} - -/// In flow. -// -/// ```markdown -/// > | * ab -/// ^ -/// ``` -pub fn flow_inside(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None => { - tokenizer.exit(Name::Data); - State::Retry(StateName::DocumentFlowEnd) - } - // Note: EOL is part of data. - Some(b'\n') => { - tokenizer.consume(); - tokenizer.exit(Name::Data); - State::Next(StateName::DocumentFlowEnd) - } - Some(_) => { - tokenizer.consume(); - State::Next(StateName::DocumentFlowInside) - } - } -} - -/// After flow (after eol or at eof). -// -/// ```markdown -/// | * a -/// > | > b -/// ^ ^ -/// ``` -pub fn flow_end(tokenizer: &mut Tokenizer) -> State { - let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); - let state = tokenizer - .tokenize_state - .document_child_state - .unwrap_or(State::Next(StateName::FlowStart)); - - tokenizer.tokenize_state.document_exits.push(None); - - let state = child.push( - (child.point.index, child.point.vs), - (tokenizer.point.index, tokenizer.point.vs), - state, - ); - - let paragraph = matches!(state, State::Next(StateName::ParagraphInside)) - || (!child.events.is_empty() - && child.events - [skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding])] - .name - == Name::Paragraph); - - tokenizer.tokenize_state.document_child_state = Some(state); - - if child.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before { - tokenizer.tokenize_state.document_continued = - tokenizer.tokenize_state.document_container_stack.len(); - } - - if tokenizer.tokenize_state.document_continued - != tokenizer.tokenize_state.document_container_stack.len() - { - exit_containers(tokenizer, &Phase::After); - } - - match tokenizer.current { - None => { - tokenizer.tokenize_state.document_continued = 0; - exit_containers(tokenizer, &Phase::Eof); - resolve(tokenizer); - State::Ok - } - Some(_) => { - tokenizer.tokenize_state.document_continued = 0; - tokenizer.tokenize_state.document_paragraph_before = paragraph; - // Containers would only be interrupting if we’ve continued. - tokenizer.interrupt = false; - State::Retry(StateName::DocumentContainerExistingBefore) - } - } -} - -/// Close containers (and flow if needed). -fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { - let mut stack_close = tokenizer - .tokenize_state - .document_container_stack - .split_off(tokenizer.tokenize_state.document_continued); - - let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); - - // Flush if needed. - if *phase != Phase::After { - let state = tokenizer - .tokenize_state - .document_child_state - .take() - .unwrap_or(State::Next(StateName::FlowStart)); - - child.flush(state, false); - } - - if !stack_close.is_empty() { - let index = tokenizer.tokenize_state.document_exits.len() - - (if *phase == Phase::After { 2 } else { 1 }); - let mut exits = Vec::with_capacity(stack_close.len()); - - while !stack_close.is_empty() { - let container = stack_close.pop().unwrap(); - let name = match container.kind { - Container::BlockQuote => Name::BlockQuote, - Container::ListItem => Name::ListItem, - }; - - exits.push(Event { - kind: Kind::Exit, - name: name.clone(), - point: tokenizer.point.clone(), - link: None, - }); - - let mut stack_index = tokenizer.stack.len(); - let mut found = false; - - while stack_index > 0 { - stack_index -= 1; - - if tokenizer.stack[stack_index] == name { - tokenizer.stack.remove(stack_index); - found = true; - break; - } - } - - debug_assert!(found, "expected to find container token to exit"); - } - - if let Some(ref mut list) = tokenizer.tokenize_state.document_exits[index] { - list.append(&mut exits); - } else { - tokenizer.tokenize_state.document_exits[index] = Some(exits); - } - } - - child.interrupt = false; -} - -// Inject everything together. -fn resolve(tokenizer: &mut Tokenizer) { - let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); - - // First, add the container exits into `child`. - let mut child_index = 0; - let mut line = 0; - - while child_index < child.events.len() { - let event = &child.events[child_index]; - - if event.kind == Kind::Enter - && (event.name == Name::LineEnding || event.name == Name::BlankLineEnding) - { - if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() { - let mut exit_index = 0; - while exit_index < exits.len() { - exits[exit_index].point = event.point.clone(); - exit_index += 1; - } - - child.map.add(child_index, 0, exits); - } - - line += 1; - } - - child_index += 1; - } - - child.map.consume(&mut child.events); - - // Now, add all child events into our parent document tokenizer. - divide_events( - &mut tokenizer.map, - &tokenizer.events, - skip::to(&tokenizer.events, 0, &[Name::Data]), - &mut child.events, - ); - - // Replace the flow data with actual events. - tokenizer.map.consume(&mut tokenizer.events); - - // Now, add some final container exits due to the EOF. - // We can’t inject them into the child earlier, as they are “outside” its - // linked data. - if line < tokenizer.tokenize_state.document_exits.len() { - if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() { - let mut exit_index = 0; - while exit_index < exits.len() { - exits[exit_index].point = tokenizer.point.clone(); - exit_index += 1; - } - - tokenizer.events.append(&mut exits); - } - } - - // Add the resolvers from child. - tokenizer - .resolvers - .append(&mut child.resolvers.split_off(0)); - - tokenizer - .tokenize_state - .definitions - .append(&mut child.tokenize_state.definitions.split_off(0)); -} diff --git a/src/content/flow.rs b/src/content/flow.rs deleted file mode 100644 index 08c7891..0000000 --- a/src/content/flow.rs +++ /dev/null @@ -1,254 +0,0 @@ -//! The flow content type. -//! -//! **Flow** represents the sections, such as headings and code, which are -//! parsed per line. -//! An example is HTML, which has a certain starting condition (such as -//! `<script>` on its own line), then continues for a while, until an end -//! condition is found (such as `</style>`). -//! If that line with an end condition is never found, that flow goes until -//! the end. -//! -//! The constructs found in flow are: -//! -//! * [Blank line][crate::construct::blank_line] -//! * [Code (fenced)][crate::construct::code_fenced] -//! * [Code (indented)][crate::construct::code_indented] -//! * [Definition][crate::construct::definition] -//! * [Heading (atx)][crate::construct::heading_atx] -//! * [Heading (setext)][crate::construct::heading_setext] -//! * [HTML (flow)][crate::construct::html_flow] -//! * [Thematic break][crate::construct::thematic_break] - -use crate::event::Name; -use crate::state::{Name as StateName, State}; -use crate::tokenizer::Tokenizer; - -/// Start of flow. -// -/// ```markdown -/// > | ## alpha -/// ^ -/// > | bravo -/// ^ -/// > | *** -/// ^ -/// ``` -pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'`' | b'~') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::CodeFencedStart) - } - Some(b'<') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::HtmlFlowStart) - } - Some(b'#') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::HeadingAtxStart) - } - // Note: `-` is also used in thematic breaks, so it’s not included here. - Some(b'=') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::HeadingSetextStart) - } - Some(b'*' | b'_') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::ThematicBreakStart) - } - Some(b'[') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::DefinitionStart) - } - // Actual parsing: blank line? Indented code? Indented anything? - // Also includes `-` which can be a setext heading underline or a thematic break. - None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), - // Must be a paragraph. - Some(_) => { - tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); - State::Retry(StateName::ParagraphStart) - } - } -} - -/// At blank line. -/// -/// ```markdown -/// > | ␠␠␊ -/// ^ -/// ``` -pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::FlowBlankLineAfter), - State::Next(StateName::FlowBeforeCodeIndented), - ); - State::Retry(StateName::BlankLineStart) -} - -/// At code (indented). -/// -/// ```markdown -/// > | ␠␠␠␠a -/// ^ -/// ``` -pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeCodeFenced), - ); - State::Retry(StateName::CodeIndentedStart) -} - -/// At code (fenced). -/// -/// ````markdown -/// > | ``` -/// ^ -/// ```` -pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeHtml), - ); - State::Retry(StateName::CodeFencedStart) -} - -/// At html (flow). -/// -/// ```markdown -/// > | <a> -/// ^ -/// ``` -pub fn before_html(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeHeadingAtx), - ); - State::Retry(StateName::HtmlFlowStart) -} - -/// At heading (atx). -/// -/// ```markdown -/// > | # a -/// ^ -/// ``` -pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeHeadingSetext), - ); - State::Retry(StateName::HeadingAtxStart) -} - -/// At heading (setext). -/// -/// ```markdown -/// | a -/// > | = -/// ^ -/// ``` -pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeThematicBreak), - ); - State::Retry(StateName::HeadingSetextStart) -} - -/// At thematic break. -/// -/// ```markdown -/// > | *** -/// ^ -/// ``` -pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeDefinition), - ); - State::Retry(StateName::ThematicBreakStart) -} - -/// At definition. -/// -/// ```markdown -/// > | [a]: b -/// ^ -/// ``` -pub fn before_definition(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::DefinitionStart) -} - -/// At paragraph. -/// -/// ```markdown -/// > | a -/// ^ -/// ``` -pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); - State::Retry(StateName::ParagraphStart) -} - -/// After blank line. -/// -/// ```markdown -/// > | ␠␠␊ -/// ^ -/// ``` -pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None => State::Ok, - Some(b'\n') => { - tokenizer.enter(Name::BlankLineEnding); - tokenizer.consume(); - tokenizer.exit(Name::BlankLineEnding); - // Feel free to interrupt. - tokenizer.interrupt = false; - State::Next(StateName::FlowStart) - } - _ => unreachable!("expected eol/eof"), - } -} - -/// After flow. -/// -/// ```markdown -/// > | # a␊ -/// ^ -/// ``` -pub fn after(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None => State::Ok, - Some(b'\n') => { - tokenizer.enter(Name::LineEnding); - tokenizer.consume(); - tokenizer.exit(Name::LineEnding); - State::Next(StateName::FlowStart) - } - _ => unreachable!("expected eol/eof"), - } -} diff --git a/src/content/mod.rs b/src/content/mod.rs deleted file mode 100644 index af40cc0..0000000 --- a/src/content/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -//! Content types found in markdown. -//! -//! * [document][document] -//! * [flow][flow] -//! * [string][string] -//! * [text][text] - -pub mod document; -pub mod flow; -pub mod string; -pub mod text; diff --git a/src/content/string.rs b/src/content/string.rs deleted file mode 100644 index ec4fce2..0000000 --- a/src/content/string.rs +++ /dev/null @@ -1,76 +0,0 @@ -//! The string content type. -//! -//! **String** is a limited [text][] like content type which only allows -//! character escapes and character references. -//! It exists in things such as identifiers (media references, definitions), -//! titles, URLs, code (fenced) info and meta parts. -//! -//! The constructs found in string are: -//! -//! * [Character escape][crate::construct::character_escape] -//! * [Character reference][crate::construct::character_reference] -//! -//! [text]: crate::content::text - -use crate::construct::partial_whitespace::resolve_whitespace; -use crate::resolve::Name as ResolveName; -use crate::state::{Name as StateName, State}; -use crate::tokenizer::Tokenizer; - -/// Characters that can start something in string. -const MARKERS: [u8; 2] = [b'&', b'\\']; - -/// Start of string. -/// -/// ````markdown -/// > | ```js -/// ^ -/// ```` -pub fn start(tokenizer: &mut Tokenizer) -> State { - tokenizer.register_resolver(ResolveName::String); - tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(StateName::StringBefore) -} - -/// Before string. -/// -/// ````markdown -/// > | ```js -/// ^ -/// ```` -pub fn before(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None => State::Ok, - Some(b'&') => { - tokenizer.attempt( - State::Next(StateName::StringBefore), - State::Next(StateName::StringBeforeData), - ); - State::Retry(StateName::CharacterReferenceStart) - } - Some(b'\\') => { - tokenizer.attempt( - State::Next(StateName::StringBefore), - State::Next(StateName::StringBeforeData), - ); - State::Retry(StateName::CharacterEscapeStart) - } - _ => State::Retry(StateName::StringBeforeData), - } -} - -/// At data. -/// -/// ````markdown -/// > | ```js -/// ^ -/// ```` -pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok); - State::Retry(StateName::DataStart) -} - -/// Resolve whitespace in string. -pub fn resolve(tokenizer: &mut Tokenizer) { - resolve_whitespace(tokenizer, false, false); -} diff --git a/src/content/text.rs b/src/content/text.rs deleted file mode 100644 index 5c13dba..0000000 --- a/src/content/text.rs +++ /dev/null @@ -1,173 +0,0 @@ -//! The text content type. -//! -//! **Text** contains phrasing content such as -//! [attention][crate::construct::attention] (emphasis, strong), -//! [code (text)][crate::construct::code_text], and actual text. -//! -//! The constructs found in text are: -//! -//! * [Attention][crate::construct::attention] -//! * [Autolink][crate::construct::autolink] -//! * [Character escape][crate::construct::character_escape] -//! * [Character reference][crate::construct::character_reference] -//! * [Code (text)][crate::construct::code_text] -//! * [Hard break (escape)][crate::construct::hard_break_escape] -//! * [HTML (text)][crate::construct::html_text] -//! * [Label start (image)][crate::construct::label_start_image] -//! * [Label start (link)][crate::construct::label_start_link] -//! * [Label end][crate::construct::label_end] -//! -//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by -//! > [whitespace][crate::construct::partial_whitespace]. - -use crate::construct::partial_whitespace::resolve_whitespace; -use crate::resolve::Name as ResolveName; -use crate::state::{Name as StateName, State}; -use crate::tokenizer::Tokenizer; - -/// Characters that can start something in text. -const MARKERS: [u8; 9] = [ - b'!', // `label_start_image` - b'&', // `character_reference` - b'*', // `attention` - b'<', // `autolink`, `html_text` - b'[', // `label_start_link` - b'\\', // `character_escape`, `hard_break_escape` - b']', // `label_end` - b'_', // `attention` - b'`', // `code_text` -]; - -/// Start of text. -/// -/// ```markdown -/// > | abc -/// ^ -/// ``` -pub fn start(tokenizer: &mut Tokenizer) -> State { - tokenizer.register_resolver(ResolveName::Text); - tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(StateName::TextBefore) -} - -/// Before text. -/// -/// ```markdown -/// > | abc -/// ^ -/// ``` -pub fn before(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None => State::Ok, - Some(b'!') => { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ); - State::Retry(StateName::LabelStartImageStart) - } - Some(b'&') => { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ); - State::Retry(StateName::CharacterReferenceStart) - } - Some(b'*' | b'_') => { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ); - State::Retry(StateName::AttentionStart) - } - // `autolink`, `html_text` (order does not matter) - Some(b'<') => { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeHtml), - ); - State::Retry(StateName::AutolinkStart) - } - Some(b'[') => { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ); - State::Retry(StateName::LabelStartLinkStart) - } - Some(b'\\') => { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeHardBreakEscape), - ); - State::Retry(StateName::CharacterEscapeStart) - } - Some(b']') => { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ); - State::Retry(StateName::LabelEndStart) - } - Some(b'`') => { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ); - State::Retry(StateName::CodeTextStart) - } - _ => State::Retry(StateName::TextBeforeData), - } -} - -/// Before html (text). -/// -/// At `<`, which wasn’t an autolink. -/// -/// ```markdown -/// > | a <b> -/// ^ -/// ``` -pub fn before_html(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ); - State::Retry(StateName::HtmlTextStart) -} - -/// Before hard break escape. -/// -/// At `\`, which wasn’t a character escape. -/// -/// ```markdown -/// > | a \␊ -/// ^ -/// ``` -pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ); - State::Retry(StateName::HardBreakEscapeStart) -} - -/// Before data. -/// -/// ```markdown -/// > | a -/// ^ -/// ``` -pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok); - State::Retry(StateName::DataStart) -} - -/// Resolve whitespace. -pub fn resolve(tokenizer: &mut Tokenizer) { - resolve_whitespace( - tokenizer, - tokenizer.parse_state.constructs.hard_break_trailing, - true, - ); -} |