diff options
Diffstat (limited to 'src/content')
| -rw-r--r-- | src/content/document.rs | 492 | ||||
| -rw-r--r-- | src/content/flow.rs | 254 | ||||
| -rw-r--r-- | src/content/mod.rs | 11 | ||||
| -rw-r--r-- | src/content/string.rs | 76 | ||||
| -rw-r--r-- | src/content/text.rs | 173 | 
5 files changed, 0 insertions, 1006 deletions
diff --git a/src/content/document.rs b/src/content/document.rs deleted file mode 100644 index 9def6c5..0000000 --- a/src/content/document.rs +++ /dev/null @@ -1,492 +0,0 @@ -//! The document content type. -//! -//! **Document** represents the containers, such as block quotes and lists, -//! which structure the document and contain other sections. -//! -//! The constructs found in flow are: -//! -//! *   [Block quote][crate::construct::block_quote] -//! *   [List][crate::construct::list_item] - -use crate::event::{Content, Event, Kind, Link, Name}; -use crate::state::{Name as StateName, State}; -use crate::subtokenize::divide_events; -use crate::tokenizer::{Container, ContainerState, Tokenizer}; -use crate::util::skip; - -/// Phases where we can exit containers. -#[derive(Debug, PartialEq)] -enum Phase { -    /// After parsing a line of lazy flow which resulted in something that -    /// exits containers before the line. -    /// -    /// ```markdown -    ///   | * a -    /// > | ```js -    ///          ^ -    ///   | b -    ///   | ``` -    /// ``` -    After, -    /// When a new container replaces an existing container. -    /// -    /// ```markdown -    ///   | * a -    /// > | > b -    ///     ^ -    /// ``` -    Prefix, -    /// After everything. -    /// -    /// ```markdown -    /// > | * a -    ///        ^ -    /// ``` -    Eof, -} - -/// Start of document, at an optional BOM. -/// -/// ```markdown -/// > | a -///     ^ -/// ``` -pub fn start(tokenizer: &mut Tokenizer) -> State { -    tokenizer.tokenize_state.document_child = Some(Box::new(Tokenizer::new( -        tokenizer.point.clone(), -        tokenizer.parse_state, -    ))); - -    tokenizer.attempt( -        State::Next(StateName::DocumentContainerExistingBefore), -        State::Next(StateName::DocumentContainerExistingBefore), -    ); - -    State::Retry(StateName::BomStart) -} - -/// At optional existing containers. -// -/// ```markdown -///   | * a -/// > | > b -///     ^ -/// ``` -pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { -    // If there are more existing containers, check whether the next one continues. -    if tokenizer.tokenize_state.document_continued -        < tokenizer.tokenize_state.document_container_stack.len() -    { -        let container = &tokenizer.tokenize_state.document_container_stack -            [tokenizer.tokenize_state.document_continued]; - -        let name = match container.kind { -            Container::BlockQuote => StateName::BlockQuoteContStart, -            Container::ListItem => StateName::ListItemContStart, -        }; - -        tokenizer.attempt( -            State::Next(StateName::DocumentContainerExistingAfter), -            State::Next(StateName::DocumentContainerNewBefore), -        ); - -        State::Retry(name) -    } -    // Otherwise, check new containers. -    else { -        State::Retry(StateName::DocumentContainerNewBefore) -    } -} - -/// After continued existing container. -// -/// ```markdown -///   | * a -/// > |   b -///       ^ -/// ``` -pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State { -    tokenizer.tokenize_state.document_continued += 1; -    State::Retry(StateName::DocumentContainerExistingBefore) -} - -/// At new containers. -// -/// ```markdown -/// > | * a -///     ^ -/// > | > b -///     ^ -/// ``` -pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { -    // If we have completely continued, restore the flow’s past `interrupt` -    // status. -    if tokenizer.tokenize_state.document_continued -        == tokenizer.tokenize_state.document_container_stack.len() -    { -        let child = tokenizer.tokenize_state.document_child.as_ref().unwrap(); - -        tokenizer.interrupt = child.interrupt; - -        // …and if we’re in a concrete construct, new containers can’t “pierce” -        // into them. -        if child.concrete { -            return State::Retry(StateName::DocumentContainersAfter); -        } -    } - -    // Check for a new container. -    // Block quote? -    // Add a new container at the end of the stack. -    let tail = tokenizer.tokenize_state.document_container_stack.len(); -    tokenizer -        .tokenize_state -        .document_container_stack -        .push(ContainerState { -            kind: Container::BlockQuote, -            blank_initial: false, -            size: 0, -        }); -    // Swap the existing container with the new one. -    tokenizer -        .tokenize_state -        .document_container_stack -        .swap(tokenizer.tokenize_state.document_continued, tail); - -    tokenizer.attempt( -        State::Next(StateName::DocumentContainerNewAfter), -        State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote), -    ); -    State::Retry(StateName::BlockQuoteStart) -} - -/// At new container, but not a block quote. -// -/// ```markdown -/// > | * a -///     ^ -/// ``` -pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State { -    // List item? -    // We replace the empty block quote container for this new list one. -    tokenizer.tokenize_state.document_container_stack -        [tokenizer.tokenize_state.document_continued] = ContainerState { -        kind: Container::ListItem, -        blank_initial: false, -        size: 0, -    }; - -    tokenizer.attempt( -        State::Next(StateName::DocumentContainerNewAfter), -        State::Next(StateName::DocumentContainerNewBeforeNotList), -    ); -    State::Retry(StateName::ListItemStart) -} - -/// At new container, but not a list (or block quote). -// -/// ```markdown -/// > | a -///     ^ -/// ``` -pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State { -    // It wasn’t a new block quote or a list. -    // Swap the new container (in the middle) with the existing one (at the end). -    // Drop what was in the middle. -    tokenizer -        .tokenize_state -        .document_container_stack -        .swap_remove(tokenizer.tokenize_state.document_continued); - -    State::Retry(StateName::DocumentContainersAfter) -} - -/// After new container. -/// -/// ```markdown -/// > | * a -///       ^ -/// > | > b -///       ^ -/// ``` -pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { -    // It was a new block quote or a list. -    // Swap the new container (in the middle) with the existing one (at the end). -    // Take the new container. -    let container = tokenizer -        .tokenize_state -        .document_container_stack -        .swap_remove(tokenizer.tokenize_state.document_continued); - -    // If we did not continue all existing containers, and there is a new one, -    // close the flow and those containers. -    if tokenizer.tokenize_state.document_continued -        != tokenizer.tokenize_state.document_container_stack.len() -    { -        exit_containers(tokenizer, &Phase::Prefix); -    } - -    tokenizer -        .tokenize_state -        .document_container_stack -        .push(container); -    tokenizer.tokenize_state.document_continued += 1; -    tokenizer.interrupt = false; -    State::Retry(StateName::DocumentContainerNewBefore) -} - -/// After containers, at flow. -// -/// ```markdown -/// > | * a -///       ^ -/// > | > b -///       ^ -/// ``` -pub fn containers_after(tokenizer: &mut Tokenizer) -> State { -    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); - -    child.lazy = tokenizer.tokenize_state.document_continued -        != tokenizer.tokenize_state.document_container_stack.len(); -    child.define_skip(tokenizer.point.clone()); - -    match tokenizer.current { -        // Note: EOL is part of data. -        None => State::Retry(StateName::DocumentFlowEnd), -        Some(_) => { -            let current = tokenizer.events.len(); -            let previous = tokenizer.tokenize_state.document_data_index; -            if let Some(previous) = previous { -                tokenizer.events[previous].link.as_mut().unwrap().next = Some(current); -            } -            tokenizer.tokenize_state.document_data_index = Some(current); -            tokenizer.enter_link( -                Name::Data, -                Link { -                    previous, -                    next: None, -                    content: Content::Flow, -                }, -            ); -            State::Retry(StateName::DocumentFlowInside) -        } -    } -} - -/// In flow. -// -/// ```markdown -/// > | * ab -///       ^ -/// ``` -pub fn flow_inside(tokenizer: &mut Tokenizer) -> State { -    match tokenizer.current { -        None => { -            tokenizer.exit(Name::Data); -            State::Retry(StateName::DocumentFlowEnd) -        } -        // Note: EOL is part of data. -        Some(b'\n') => { -            tokenizer.consume(); -            tokenizer.exit(Name::Data); -            State::Next(StateName::DocumentFlowEnd) -        } -        Some(_) => { -            tokenizer.consume(); -            State::Next(StateName::DocumentFlowInside) -        } -    } -} - -/// After flow (after eol or at eof). -// -/// ```markdown -///   | * a -/// > | > b -///     ^  ^ -/// ``` -pub fn flow_end(tokenizer: &mut Tokenizer) -> State { -    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); -    let state = tokenizer -        .tokenize_state -        .document_child_state -        .unwrap_or(State::Next(StateName::FlowStart)); - -    tokenizer.tokenize_state.document_exits.push(None); - -    let state = child.push( -        (child.point.index, child.point.vs), -        (tokenizer.point.index, tokenizer.point.vs), -        state, -    ); - -    let paragraph = matches!(state, State::Next(StateName::ParagraphInside)) -        || (!child.events.is_empty() -            && child.events -                [skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding])] -            .name -                == Name::Paragraph); - -    tokenizer.tokenize_state.document_child_state = Some(state); - -    if child.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before { -        tokenizer.tokenize_state.document_continued = -            tokenizer.tokenize_state.document_container_stack.len(); -    } - -    if tokenizer.tokenize_state.document_continued -        != tokenizer.tokenize_state.document_container_stack.len() -    { -        exit_containers(tokenizer, &Phase::After); -    } - -    match tokenizer.current { -        None => { -            tokenizer.tokenize_state.document_continued = 0; -            exit_containers(tokenizer, &Phase::Eof); -            resolve(tokenizer); -            State::Ok -        } -        Some(_) => { -            tokenizer.tokenize_state.document_continued = 0; -            tokenizer.tokenize_state.document_paragraph_before = paragraph; -            // Containers would only be interrupting if we’ve continued. -            tokenizer.interrupt = false; -            State::Retry(StateName::DocumentContainerExistingBefore) -        } -    } -} - -/// Close containers (and flow if needed). -fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { -    let mut stack_close = tokenizer -        .tokenize_state -        .document_container_stack -        .split_off(tokenizer.tokenize_state.document_continued); - -    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); - -    // Flush if needed. -    if *phase != Phase::After { -        let state = tokenizer -            .tokenize_state -            .document_child_state -            .take() -            .unwrap_or(State::Next(StateName::FlowStart)); - -        child.flush(state, false); -    } - -    if !stack_close.is_empty() { -        let index = tokenizer.tokenize_state.document_exits.len() -            - (if *phase == Phase::After { 2 } else { 1 }); -        let mut exits = Vec::with_capacity(stack_close.len()); - -        while !stack_close.is_empty() { -            let container = stack_close.pop().unwrap(); -            let name = match container.kind { -                Container::BlockQuote => Name::BlockQuote, -                Container::ListItem => Name::ListItem, -            }; - -            exits.push(Event { -                kind: Kind::Exit, -                name: name.clone(), -                point: tokenizer.point.clone(), -                link: None, -            }); - -            let mut stack_index = tokenizer.stack.len(); -            let mut found = false; - -            while stack_index > 0 { -                stack_index -= 1; - -                if tokenizer.stack[stack_index] == name { -                    tokenizer.stack.remove(stack_index); -                    found = true; -                    break; -                } -            } - -            debug_assert!(found, "expected to find container token to exit"); -        } - -        if let Some(ref mut list) = tokenizer.tokenize_state.document_exits[index] { -            list.append(&mut exits); -        } else { -            tokenizer.tokenize_state.document_exits[index] = Some(exits); -        } -    } - -    child.interrupt = false; -} - -// Inject everything together. -fn resolve(tokenizer: &mut Tokenizer) { -    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); - -    // First, add the container exits into `child`. -    let mut child_index = 0; -    let mut line = 0; - -    while child_index < child.events.len() { -        let event = &child.events[child_index]; - -        if event.kind == Kind::Enter -            && (event.name == Name::LineEnding || event.name == Name::BlankLineEnding) -        { -            if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() { -                let mut exit_index = 0; -                while exit_index < exits.len() { -                    exits[exit_index].point = event.point.clone(); -                    exit_index += 1; -                } - -                child.map.add(child_index, 0, exits); -            } - -            line += 1; -        } - -        child_index += 1; -    } - -    child.map.consume(&mut child.events); - -    // Now, add all child events into our parent document tokenizer. -    divide_events( -        &mut tokenizer.map, -        &tokenizer.events, -        skip::to(&tokenizer.events, 0, &[Name::Data]), -        &mut child.events, -    ); - -    // Replace the flow data with actual events. -    tokenizer.map.consume(&mut tokenizer.events); - -    // Now, add some final container exits due to the EOF. -    // We can’t inject them into the child earlier, as they are “outside” its -    // linked data. -    if line < tokenizer.tokenize_state.document_exits.len() { -        if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() { -            let mut exit_index = 0; -            while exit_index < exits.len() { -                exits[exit_index].point = tokenizer.point.clone(); -                exit_index += 1; -            } - -            tokenizer.events.append(&mut exits); -        } -    } - -    // Add the resolvers from child. -    tokenizer -        .resolvers -        .append(&mut child.resolvers.split_off(0)); - -    tokenizer -        .tokenize_state -        .definitions -        .append(&mut child.tokenize_state.definitions.split_off(0)); -} diff --git a/src/content/flow.rs b/src/content/flow.rs deleted file mode 100644 index 08c7891..0000000 --- a/src/content/flow.rs +++ /dev/null @@ -1,254 +0,0 @@ -//! The flow content type. -//! -//! **Flow** represents the sections, such as headings and code, which are -//! parsed per line. -//! An example is HTML, which has a certain starting condition (such as -//! `<script>` on its own line), then continues for a while, until an end -//! condition is found (such as `</style>`). -//! If that line with an end condition is never found, that flow goes until -//! the end. -//! -//! The constructs found in flow are: -//! -//! *   [Blank line][crate::construct::blank_line] -//! *   [Code (fenced)][crate::construct::code_fenced] -//! *   [Code (indented)][crate::construct::code_indented] -//! *   [Definition][crate::construct::definition] -//! *   [Heading (atx)][crate::construct::heading_atx] -//! *   [Heading (setext)][crate::construct::heading_setext] -//! *   [HTML (flow)][crate::construct::html_flow] -//! *   [Thematic break][crate::construct::thematic_break] - -use crate::event::Name; -use crate::state::{Name as StateName, State}; -use crate::tokenizer::Tokenizer; - -/// Start of flow. -// -/// ```markdown -/// > | ## alpha -///     ^ -/// > |     bravo -///     ^ -/// > | *** -///     ^ -/// ``` -pub fn start(tokenizer: &mut Tokenizer) -> State { -    match tokenizer.current { -        Some(b'`' | b'~') => { -            tokenizer.attempt( -                State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), -            ); -            State::Retry(StateName::CodeFencedStart) -        } -        Some(b'<') => { -            tokenizer.attempt( -                State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), -            ); -            State::Retry(StateName::HtmlFlowStart) -        } -        Some(b'#') => { -            tokenizer.attempt( -                State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), -            ); -            State::Retry(StateName::HeadingAtxStart) -        } -        // Note: `-` is also used in thematic breaks, so it’s not included here. -        Some(b'=') => { -            tokenizer.attempt( -                State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), -            ); -            State::Retry(StateName::HeadingSetextStart) -        } -        Some(b'*' | b'_') => { -            tokenizer.attempt( -                State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), -            ); -            State::Retry(StateName::ThematicBreakStart) -        } -        Some(b'[') => { -            tokenizer.attempt( -                State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), -            ); -            State::Retry(StateName::DefinitionStart) -        } -        // Actual parsing: blank line? Indented code? Indented anything? -        // Also includes `-` which can be a setext heading underline or a thematic break. -        None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), -        // Must be a paragraph. -        Some(_) => { -            tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); -            State::Retry(StateName::ParagraphStart) -        } -    } -} - -/// At blank line. -/// -/// ```markdown -/// > | ␠␠␊ -///     ^ -/// ``` -pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::FlowBlankLineAfter), -        State::Next(StateName::FlowBeforeCodeIndented), -    ); -    State::Retry(StateName::BlankLineStart) -} - -/// At code (indented). -/// -/// ```markdown -/// > | ␠␠␠␠a -///     ^ -/// ``` -pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::FlowAfter), -        State::Next(StateName::FlowBeforeCodeFenced), -    ); -    State::Retry(StateName::CodeIndentedStart) -} - -/// At code (fenced). -/// -/// ````markdown -/// > | ``` -///     ^ -/// ```` -pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::FlowAfter), -        State::Next(StateName::FlowBeforeHtml), -    ); -    State::Retry(StateName::CodeFencedStart) -} - -/// At html (flow). -/// -/// ```markdown -/// > | <a> -///     ^ -/// ``` -pub fn before_html(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::FlowAfter), -        State::Next(StateName::FlowBeforeHeadingAtx), -    ); -    State::Retry(StateName::HtmlFlowStart) -} - -/// At heading (atx). -/// -/// ```markdown -/// > | # a -///     ^ -/// ``` -pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::FlowAfter), -        State::Next(StateName::FlowBeforeHeadingSetext), -    ); -    State::Retry(StateName::HeadingAtxStart) -} - -/// At heading (setext). -/// -/// ```markdown -///   | a -/// > | = -///     ^ -/// ``` -pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::FlowAfter), -        State::Next(StateName::FlowBeforeThematicBreak), -    ); -    State::Retry(StateName::HeadingSetextStart) -} - -/// At thematic break. -/// -/// ```markdown -/// > | *** -///     ^ -/// ``` -pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::FlowAfter), -        State::Next(StateName::FlowBeforeDefinition), -    ); -    State::Retry(StateName::ThematicBreakStart) -} - -/// At definition. -/// -/// ```markdown -/// > | [a]: b -///     ^ -/// ``` -pub fn before_definition(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::FlowAfter), -        State::Next(StateName::FlowBeforeParagraph), -    ); -    State::Retry(StateName::DefinitionStart) -} - -/// At paragraph. -/// -/// ```markdown -/// > | a -///     ^ -/// ``` -pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); -    State::Retry(StateName::ParagraphStart) -} - -/// After blank line. -/// -/// ```markdown -/// > | ␠␠␊ -///       ^ -/// ``` -pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State { -    match tokenizer.current { -        None => State::Ok, -        Some(b'\n') => { -            tokenizer.enter(Name::BlankLineEnding); -            tokenizer.consume(); -            tokenizer.exit(Name::BlankLineEnding); -            // Feel free to interrupt. -            tokenizer.interrupt = false; -            State::Next(StateName::FlowStart) -        } -        _ => unreachable!("expected eol/eof"), -    } -} - -/// After flow. -/// -/// ```markdown -/// > | # a␊ -///        ^ -/// ``` -pub fn after(tokenizer: &mut Tokenizer) -> State { -    match tokenizer.current { -        None => State::Ok, -        Some(b'\n') => { -            tokenizer.enter(Name::LineEnding); -            tokenizer.consume(); -            tokenizer.exit(Name::LineEnding); -            State::Next(StateName::FlowStart) -        } -        _ => unreachable!("expected eol/eof"), -    } -} diff --git a/src/content/mod.rs b/src/content/mod.rs deleted file mode 100644 index af40cc0..0000000 --- a/src/content/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -//! Content types found in markdown. -//! -//! *   [document][document] -//! *   [flow][flow] -//! *   [string][string] -//! *   [text][text] - -pub mod document; -pub mod flow; -pub mod string; -pub mod text; diff --git a/src/content/string.rs b/src/content/string.rs deleted file mode 100644 index ec4fce2..0000000 --- a/src/content/string.rs +++ /dev/null @@ -1,76 +0,0 @@ -//! The string content type. -//! -//! **String** is a limited [text][] like content type which only allows -//! character escapes and character references. -//! It exists in things such as identifiers (media references, definitions), -//! titles, URLs, code (fenced) info and meta parts. -//! -//! The constructs found in string are: -//! -//! *   [Character escape][crate::construct::character_escape] -//! *   [Character reference][crate::construct::character_reference] -//! -//! [text]: crate::content::text - -use crate::construct::partial_whitespace::resolve_whitespace; -use crate::resolve::Name as ResolveName; -use crate::state::{Name as StateName, State}; -use crate::tokenizer::Tokenizer; - -/// Characters that can start something in string. -const MARKERS: [u8; 2] = [b'&', b'\\']; - -/// Start of string. -/// -/// ````markdown -/// > | ```js -///        ^ -/// ```` -pub fn start(tokenizer: &mut Tokenizer) -> State { -    tokenizer.register_resolver(ResolveName::String); -    tokenizer.tokenize_state.markers = &MARKERS; -    State::Retry(StateName::StringBefore) -} - -/// Before string. -/// -/// ````markdown -/// > | ```js -///        ^ -/// ```` -pub fn before(tokenizer: &mut Tokenizer) -> State { -    match tokenizer.current { -        None => State::Ok, -        Some(b'&') => { -            tokenizer.attempt( -                State::Next(StateName::StringBefore), -                State::Next(StateName::StringBeforeData), -            ); -            State::Retry(StateName::CharacterReferenceStart) -        } -        Some(b'\\') => { -            tokenizer.attempt( -                State::Next(StateName::StringBefore), -                State::Next(StateName::StringBeforeData), -            ); -            State::Retry(StateName::CharacterEscapeStart) -        } -        _ => State::Retry(StateName::StringBeforeData), -    } -} - -/// At data. -/// -/// ````markdown -/// > | ```js -///        ^ -/// ```` -pub fn before_data(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok); -    State::Retry(StateName::DataStart) -} - -/// Resolve whitespace in string. -pub fn resolve(tokenizer: &mut Tokenizer) { -    resolve_whitespace(tokenizer, false, false); -} diff --git a/src/content/text.rs b/src/content/text.rs deleted file mode 100644 index 5c13dba..0000000 --- a/src/content/text.rs +++ /dev/null @@ -1,173 +0,0 @@ -//! The text content type. -//! -//! **Text** contains phrasing content such as -//! [attention][crate::construct::attention] (emphasis, strong), -//! [code (text)][crate::construct::code_text], and actual text. -//! -//! The constructs found in text are: -//! -//! *   [Attention][crate::construct::attention] -//! *   [Autolink][crate::construct::autolink] -//! *   [Character escape][crate::construct::character_escape] -//! *   [Character reference][crate::construct::character_reference] -//! *   [Code (text)][crate::construct::code_text] -//! *   [Hard break (escape)][crate::construct::hard_break_escape] -//! *   [HTML (text)][crate::construct::html_text] -//! *   [Label start (image)][crate::construct::label_start_image] -//! *   [Label start (link)][crate::construct::label_start_link] -//! *   [Label end][crate::construct::label_end] -//! -//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by -//! > [whitespace][crate::construct::partial_whitespace]. - -use crate::construct::partial_whitespace::resolve_whitespace; -use crate::resolve::Name as ResolveName; -use crate::state::{Name as StateName, State}; -use crate::tokenizer::Tokenizer; - -/// Characters that can start something in text. -const MARKERS: [u8; 9] = [ -    b'!',  // `label_start_image` -    b'&',  // `character_reference` -    b'*',  // `attention` -    b'<',  // `autolink`, `html_text` -    b'[',  // `label_start_link` -    b'\\', // `character_escape`, `hard_break_escape` -    b']',  // `label_end` -    b'_',  // `attention` -    b'`',  // `code_text` -]; - -/// Start of text. -/// -/// ```markdown -/// > | abc -///     ^ -/// ``` -pub fn start(tokenizer: &mut Tokenizer) -> State { -    tokenizer.register_resolver(ResolveName::Text); -    tokenizer.tokenize_state.markers = &MARKERS; -    State::Retry(StateName::TextBefore) -} - -/// Before text. -/// -/// ```markdown -/// > | abc -///     ^ -/// ``` -pub fn before(tokenizer: &mut Tokenizer) -> State { -    match tokenizer.current { -        None => State::Ok, -        Some(b'!') => { -            tokenizer.attempt( -                State::Next(StateName::TextBefore), -                State::Next(StateName::TextBeforeData), -            ); -            State::Retry(StateName::LabelStartImageStart) -        } -        Some(b'&') => { -            tokenizer.attempt( -                State::Next(StateName::TextBefore), -                State::Next(StateName::TextBeforeData), -            ); -            State::Retry(StateName::CharacterReferenceStart) -        } -        Some(b'*' | b'_') => { -            tokenizer.attempt( -                State::Next(StateName::TextBefore), -                State::Next(StateName::TextBeforeData), -            ); -            State::Retry(StateName::AttentionStart) -        } -        // `autolink`, `html_text` (order does not matter) -        Some(b'<') => { -            tokenizer.attempt( -                State::Next(StateName::TextBefore), -                State::Next(StateName::TextBeforeHtml), -            ); -            State::Retry(StateName::AutolinkStart) -        } -        Some(b'[') => { -            tokenizer.attempt( -                State::Next(StateName::TextBefore), -                State::Next(StateName::TextBeforeData), -            ); -            State::Retry(StateName::LabelStartLinkStart) -        } -        Some(b'\\') => { -            tokenizer.attempt( -                State::Next(StateName::TextBefore), -                State::Next(StateName::TextBeforeHardBreakEscape), -            ); -            State::Retry(StateName::CharacterEscapeStart) -        } -        Some(b']') => { -            tokenizer.attempt( -                State::Next(StateName::TextBefore), -                State::Next(StateName::TextBeforeData), -            ); -            State::Retry(StateName::LabelEndStart) -        } -        Some(b'`') => { -            tokenizer.attempt( -                State::Next(StateName::TextBefore), -                State::Next(StateName::TextBeforeData), -            ); -            State::Retry(StateName::CodeTextStart) -        } -        _ => State::Retry(StateName::TextBeforeData), -    } -} - -/// Before html (text). -/// -/// At `<`, which wasn’t an autolink. -/// -/// ```markdown -/// > | a <b> -///       ^ -/// ``` -pub fn before_html(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::TextBefore), -        State::Next(StateName::TextBeforeData), -    ); -    State::Retry(StateName::HtmlTextStart) -} - -/// Before hard break escape. -/// -/// At `\`, which wasn’t a character escape. -/// -/// ```markdown -/// > | a \␊ -///       ^ -/// ``` -pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::TextBefore), -        State::Next(StateName::TextBeforeData), -    ); -    State::Retry(StateName::HardBreakEscapeStart) -} - -/// Before data. -/// -/// ```markdown -/// > | a -///     ^ -/// ``` -pub fn before_data(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok); -    State::Retry(StateName::DataStart) -} - -/// Resolve whitespace. -pub fn resolve(tokenizer: &mut Tokenizer) { -    resolve_whitespace( -        tokenizer, -        tokenizer.parse_state.constructs.hard_break_trailing, -        true, -    ); -}  | 
