diff options
Diffstat (limited to '')
| -rw-r--r-- | src/construct/attention.rs | 6 | ||||
| -rw-r--r-- | src/construct/content.rs | 188 | ||||
| -rw-r--r-- | src/construct/definition.rs | 26 | ||||
| -rw-r--r-- | src/construct/document.rs | 5 | ||||
| -rw-r--r-- | src/construct/flow.rs | 33 | ||||
| -rw-r--r-- | src/construct/gfm_table.rs | 61 | ||||
| -rw-r--r-- | src/construct/heading_atx.rs | 7 | ||||
| -rw-r--r-- | src/construct/heading_setext.rs | 137 | ||||
| -rw-r--r-- | src/construct/label_end.rs | 5 | ||||
| -rw-r--r-- | src/construct/list_item.rs | 7 | ||||
| -rw-r--r-- | src/construct/mod.rs | 4 | ||||
| -rw-r--r-- | src/construct/paragraph.rs | 149 | ||||
| -rw-r--r-- | src/construct/partial_data.rs | 7 | ||||
| -rw-r--r-- | src/construct/string.rs | 6 | ||||
| -rw-r--r-- | src/construct/text.rs | 6 | 
15 files changed, 436 insertions, 211 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index 4a208df..4d58610 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -79,6 +79,7 @@  use crate::event::{Event, Kind, Name, Point};  use crate::resolve::Name as ResolveName;  use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult;  use crate::tokenizer::Tokenizer;  use crate::util::{      char::{ @@ -87,6 +88,7 @@ use crate::util::{      },      slice::Slice,  }; +use alloc::string::String;  use alloc::{vec, vec::Vec};  /// Attentention sequence that we can take markers from. @@ -150,7 +152,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {  }  /// Resolve sequences. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {      // Find all sequences, gather info about them.      let mut sequences = get_sequences(tokenizer); @@ -221,6 +223,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {      }      tokenizer.map.consume(&mut tokenizer.events); + +    Ok(None)  }  /// Get sequences. diff --git a/src/construct/content.rs b/src/construct/content.rs new file mode 100644 index 0000000..6c10cea --- /dev/null +++ b/src/construct/content.rs @@ -0,0 +1,188 @@ +//! Content occurs in the [flow][] content type. +//! +//! Content contains zero or more [definition][definition]s, followed by zero +//! or one [paragraph][]. +//! +//! The constructs found in flow are: +//! +//! *   [Definition][crate::construct::definition] +//! *   [Paragraph][crate::construct::paragraph] +//! +//! ## Tokens +//! +//! *   [`Content`][Name::Content] +//! +//! > 👉 **Note**: while parsing, [`Content`][Name::Content] +//! > is used, which is later compiled away. +//! +//! ## References +//! +//! *   [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js) +//! +//! [flow]: crate::construct::flow +//! [definition]: crate::construct::definition +//! [paragraph]: crate::construct::paragraph + +use crate::event::{Content, Kind, Link, Name}; +use crate::resolve::Name as ResolveName; +use crate::state::{Name as StateName, State}; +use crate::subtokenize::{subtokenize, Subresult}; +use crate::tokenizer::Tokenizer; +use alloc::{string::String, vec}; + +/// Before a content content. +/// +/// ```markdown +/// > | abc +///     ^ +/// ``` +pub fn chunk_start(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        None | Some(b'\n') => unreachable!("unexpected eol/eof"), +        _ => { +            tokenizer.enter_link( +                Name::Content, +                Link { +                    previous: None, +                    next: None, +                    content: Content::Content, +                }, +            ); +            State::Retry(StateName::ContentChunkInside) +        } +    } +} + +/// In a content chunk. +/// +/// ```markdown +/// > | abc +///     ^^^ +/// ``` +pub fn chunk_inside(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        None | Some(b'\n') => { +            tokenizer.exit(Name::Content); +            tokenizer.register_resolver_before(ResolveName::Content); +            // You’d be interrupting. +            tokenizer.interrupt = true; +            State::Ok +        } +        _ => { +            tokenizer.consume(); +            State::Next(StateName::ContentChunkInside) +        } +    } +} + +/// Before a definition. +/// +/// ```markdown +/// > | [a]: b +///     ^ +/// ``` +pub fn definition_before(tokenizer: &mut Tokenizer) -> State { +    tokenizer.attempt( +        State::Next(StateName::ContentDefinitionAfter), +        State::Next(StateName::ParagraphStart), +    ); +    State::Retry(StateName::DefinitionStart) +} + +/// After a definition. +/// +/// ```markdown +/// > | [a]: b +///           ^ +///   | c +/// ``` +pub fn definition_after(tokenizer: &mut Tokenizer) -> State { +    debug_assert!(matches!(tokenizer.current, None | Some(b'\n'))); +    if tokenizer.current.is_none() { +        State::Ok +    } else { +        tokenizer.enter(Name::LineEnding); +        tokenizer.consume(); +        tokenizer.exit(Name::LineEnding); +        State::Next(StateName::ContentDefinitionBefore) +    } +} + +/// Merge `Content` chunks, which currently span a single line, into actual +/// `Content`s that span multiple lines. +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { +    let mut index = 0; + +    while index < tokenizer.events.len() { +        let event = &tokenizer.events[index]; + +        if event.kind == Kind::Enter && event.name == Name::Content { +            // Exit:Content +            let mut exit_index = index + 1; + +            loop { +                let mut enter_index = exit_index + 1; + +                if enter_index == tokenizer.events.len() +                    || tokenizer.events[enter_index].name != Name::LineEnding +                { +                    break; +                } + +                // Skip past line ending. +                enter_index += 2; + +                // Skip past prefix. +                while enter_index < tokenizer.events.len() { +                    let event = &tokenizer.events[enter_index]; + +                    if event.name != Name::SpaceOrTab +                        && event.name != Name::BlockQuotePrefix +                        && event.name != Name::BlockQuoteMarker +                    { +                        break; +                    } + +                    enter_index += 1; +                } + +                if enter_index == tokenizer.events.len() +                    || tokenizer.events[enter_index].name != Name::Content +                { +                    break; +                } + +                // Set Exit:Content point to Exit:LineEnding. +                tokenizer.events[exit_index].point = tokenizer.events[exit_index + 2].point.clone(); +                // Remove Enter:LineEnding, Exit:LineEnding. +                tokenizer.map.add(exit_index + 1, 2, vec![]); + +                // Link Enter:Content to Enter:Content on this line and vice versa. +                tokenizer.events[exit_index - 1].link.as_mut().unwrap().next = Some(enter_index); +                tokenizer.events[enter_index] +                    .link +                    .as_mut() +                    .unwrap() +                    .previous = Some(exit_index - 1); + +                // Potential next start. +                exit_index = enter_index + 1; +            } + +            // Move to `Exit:Content`. +            index = exit_index; +        } + +        index += 1; +    } + +    tokenizer.map.consume(&mut tokenizer.events); + +    let result = subtokenize( +        &mut tokenizer.events, +        tokenizer.parse_state, +        &Some(Content::Content), +    )?; + +    Ok(Some(result)) +} diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 1071489..8ccfb90 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -1,4 +1,4 @@ -//! Definition occurs in the [flow] content type. +//! Definition occurs in the [content] content type.  //!  //! ## Grammar  //! @@ -12,8 +12,8 @@  //! ; those parts.  //! ```  //! -//! As this construct occurs in flow, like all flow constructs, it must be -//! followed by an eol (line ending) or eof (end of file). +//! This construct must be followed by an eol (line ending) or eof (end of +//! file), like flow constructs.  //!  //! See [`destination`][destination], [`label`][label], and [`title`][title]  //! for grammar, notes, and recommendations on each part. @@ -88,7 +88,7 @@  //! *   [`definition.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/definition.js)  //! *   [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions)  //! -//! [flow]: crate::construct::flow +//! [content]: crate::construct::content  //! [string]: crate::construct::string  //! [character_escape]: crate::construct::character_escape  //! [character_reference]: crate::construct::character_reference @@ -157,7 +157,10 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {              tokenizer.tokenize_state.token_1 = Name::DefinitionLabel;              tokenizer.tokenize_state.token_2 = Name::DefinitionLabelMarker;              tokenizer.tokenize_state.token_3 = Name::DefinitionLabelString; -            tokenizer.attempt(State::Next(StateName::DefinitionLabelAfter), State::Nok); +            tokenizer.attempt( +                State::Next(StateName::DefinitionLabelAfter), +                State::Next(StateName::DefinitionLabelNok), +            );              State::Retry(StateName::LabelStart)          }          _ => State::Nok, @@ -192,6 +195,19 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State {      }  } +/// At a non-label +/// +/// ```markdown +/// > | [] +///     ^ +/// ``` +pub fn label_nok(tokenizer: &mut Tokenizer) -> State { +    tokenizer.tokenize_state.token_1 = Name::Data; +    tokenizer.tokenize_state.token_2 = Name::Data; +    tokenizer.tokenize_state.token_3 = Name::Data; +    State::Nok +} +  /// After marker.  ///  /// ```markdown diff --git a/src/construct/document.rs b/src/construct/document.rs index 45a961d..82f2ebd 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -413,7 +413,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {      while !document_lazy_continuation_current && stack_index > 0 {          stack_index -= 1;          let name = &child.stack[stack_index]; -        if name == &Name::Paragraph || name == &Name::Definition || name == &Name::GfmTableHead { +        if name == &Name::Content || name == &Name::GfmTableHead {              document_lazy_continuation_current = true;          }      } @@ -423,7 +423,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {      if !document_lazy_continuation_current && !child.events.is_empty() {          let before = skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding]);          let name = &child.events[before].name; -        if name == &Name::Paragraph { +        if name == &Name::Content {              document_lazy_continuation_current = true;          }      } @@ -582,6 +582,7 @@ fn resolve(tokenizer: &mut Tokenizer) {          &tokenizer.events,          flow_index,          &mut child.events, +        (0, 0),      );      // Replace the flow data with actual events. diff --git a/src/construct/flow.rs b/src/construct/flow.rs index e97ee63..08e0466 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -12,7 +12,6 @@  //!  //! *   [Blank line][crate::construct::blank_line]  //! *   [Code (indented)][crate::construct::code_indented] -//! *   [Definition][crate::construct::definition]  //! *   [Heading (atx)][crate::construct::heading_atx]  //! *   [Heading (setext)][crate::construct::heading_setext]  //! *   [HTML (flow)][crate::construct::html_flow] @@ -40,14 +39,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {          Some(b'#') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), +                State::Next(StateName::FlowBeforeContent),              );              State::Retry(StateName::HeadingAtxStart)          }          Some(b'$' | b'`' | b'~') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), +                State::Next(StateName::FlowBeforeContent),              );              State::Retry(StateName::RawFlowStart)          } @@ -56,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {          Some(b'*' | b'_') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), +                State::Next(StateName::FlowBeforeContent),              );              State::Retry(StateName::ThematicBreakStart)          } @@ -70,12 +69,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {          Some(b'{') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter), -                State::Next(StateName::FlowBeforeParagraph), +                State::Next(StateName::FlowBeforeContent),              );              State::Retry(StateName::MdxExpressionFlowStart)          }          // Actual parsing: blank line? Indented code? Indented anything? -        // Tables, setext heading underlines, definitions, and paragraphs are +        // Tables, setext heading underlines, definitions, and Contents are          // particularly weird.          _ => State::Retry(StateName::FlowBlankLineBefore),      } @@ -217,34 +216,20 @@ pub fn before_mdx_expression(tokenizer: &mut Tokenizer) -> State {  pub fn before_gfm_table(tokenizer: &mut Tokenizer) -> State {      tokenizer.attempt(          State::Next(StateName::FlowAfter), -        State::Next(StateName::FlowBeforeDefinition), +        State::Next(StateName::FlowBeforeContent),      );      State::Retry(StateName::GfmTableStart)  } -/// At definition. -/// -/// ```markdown -/// > | [a]: b -///     ^ -/// ``` -pub fn before_definition(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt( -        State::Next(StateName::FlowAfter), -        State::Next(StateName::FlowBeforeParagraph), -    ); -    State::Retry(StateName::DefinitionStart) -} - -/// At paragraph. +/// At content.  ///  /// ```markdown  /// > | a  ///     ^  /// ``` -pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { +pub fn before_content(tokenizer: &mut Tokenizer) -> State {      tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); -    State::Retry(StateName::ParagraphStart) +    State::Retry(StateName::ContentChunkStart)  }  /// After blank line. diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs index 27fbadf..63772c4 100644 --- a/src/construct/gfm_table.rs +++ b/src/construct/gfm_table.rs @@ -229,9 +229,10 @@ use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}  use crate::event::{Content, Event, Kind, Link, Name};  use crate::resolve::Name as ResolveName;  use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult;  use crate::tokenizer::Tokenizer;  use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back}; -use alloc::vec; +use alloc::{string::String, vec};  /// Start of a GFM table.  /// @@ -771,15 +772,13 @@ pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State {  }  /// Resolve GFM table. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {      let mut index = 0; -    // let mut tables = vec![];      let mut in_first_cell_awaiting_pipe = true;      let mut in_row = false;      let mut in_delimiter_row = false;      let mut last_cell = (0, 0, 0, 0);      let mut cell = (0, 0, 0, 0); -      let mut after_head_awaiting_first_body_row = false;      let mut last_table_end = 0;      let mut last_table_has_body = false; @@ -800,17 +799,14 @@ pub fn resolve(tokenizer: &mut Tokenizer) {                  }                  // Inject table start. -                tokenizer.map.add( -                    index, -                    0, -                    vec![Event { -                        kind: Kind::Enter, -                        name: Name::GfmTable, -                        point: tokenizer.events[index].point.clone(), -                        link: None, -                    }], -                ); -            } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { +                let enter = Event { +                    kind: Kind::Enter, +                    name: Name::GfmTable, +                    point: tokenizer.events[index].point.clone(), +                    link: None, +                }; +                tokenizer.map.add(index, 0, vec![enter]); +            } else if matches!(event.name, Name::GfmTableRow | Name::GfmTableDelimiterRow) {                  in_delimiter_row = event.name == Name::GfmTableDelimiterRow;                  in_row = true;                  in_first_cell_awaiting_pipe = true; @@ -821,23 +817,21 @@ pub fn resolve(tokenizer: &mut Tokenizer) {                  if after_head_awaiting_first_body_row {                      after_head_awaiting_first_body_row = false;                      last_table_has_body = true; -                    tokenizer.map.add( -                        index, -                        0, -                        vec![Event { -                            kind: Kind::Enter, -                            name: Name::GfmTableBody, -                            point: tokenizer.events[index].point.clone(), -                            link: None, -                        }], -                    ); +                    let enter = Event { +                        kind: Kind::Enter, +                        name: Name::GfmTableBody, +                        point: tokenizer.events[index].point.clone(), +                        link: None, +                    }; +                    tokenizer.map.add(index, 0, vec![enter]);                  }              }              // Cell data.              else if in_row -                && (event.name == Name::Data -                    || event.name == Name::GfmTableDelimiterMarker -                    || event.name == Name::GfmTableDelimiterFiller) +                && matches!( +                    event.name, +                    Name::Data | Name::GfmTableDelimiterMarker | Name::GfmTableDelimiterFiller +                )              {                  in_first_cell_awaiting_pipe = false; @@ -868,7 +862,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {          } else if event.name == Name::GfmTableHead {              after_head_awaiting_first_body_row = true;              last_table_end = index; -        } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { +        } else if matches!(event.name, Name::GfmTableRow | Name::GfmTableDelimiterRow) {              in_row = false;              last_table_end = index;              if last_cell.1 != 0 { @@ -878,9 +872,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) {                  flush_cell(tokenizer, cell, in_delimiter_row, Some(index));              }          } else if in_row -            && (event.name == Name::Data -                || event.name == Name::GfmTableDelimiterMarker -                || event.name == Name::GfmTableDelimiterFiller) +            && (matches!( +                event.name, +                Name::Data | Name::GfmTableDelimiterMarker | Name::GfmTableDelimiterFiller +            ))          {              cell.3 = index;          } @@ -891,6 +886,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {      if last_table_end != 0 {          flush_table_end(tokenizer, last_table_end, last_table_has_body);      } + +    Ok(None)  }  /// Generate a cell. diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index c1090c4..b76e455 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -66,9 +66,10 @@ use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}  use crate::event::{Content, Event, Kind, Link, Name};  use crate::resolve::Name as ResolveName;  use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult;  use crate::tokenizer::Tokenizer;  use crate::util::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE}; -use alloc::vec; +use alloc::{string::String, vec};  /// Start of a heading (atx).  /// @@ -222,7 +223,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State {  }  /// Resolve heading (atx). -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {      let mut index = 0;      let mut heading_inside = false;      let mut data_start: Option<usize> = None; @@ -281,4 +282,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {          index += 1;      } + +    Ok(None)  } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index e9cc759..3a484e1 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -54,6 +54,7 @@  //! *   [`HeadingSetext`][Name::HeadingSetext]  //! *   [`HeadingSetextText`][Name::HeadingSetextText]  //! *   [`HeadingSetextUnderline`][Name::HeadingSetextUnderline] +//! *   [`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence]  //!  //! ## References  //! @@ -70,12 +71,13 @@  //! [atx]: http://www.aaronsw.com/2002/atx/  use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; -use crate::event::{Kind, Name}; +use crate::event::{Content, Event, Kind, Link, Name};  use crate::resolve::Name as ResolveName;  use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult;  use crate::tokenizer::Tokenizer; -use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back}; -use alloc::vec; +use crate::util::{constant::TAB_SIZE, skip}; +use alloc::{string::String, vec};  /// At start of heading (setext) underline.  /// @@ -90,14 +92,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {          && !tokenizer.pierce          // Require a paragraph before.          && (!tokenizer.events.is_empty() -            && tokenizer.events[skip_opt_back( +            && tokenizer.events[skip::opt_back(                  &tokenizer.events,                  tokenizer.events.len() - 1,                  &[Name::LineEnding, Name::SpaceOrTab],              )]              .name -                == Name::Paragraph) +                == Name::Content)      { +        tokenizer.enter(Name::HeadingSetextUnderline); +          if matches!(tokenizer.current, Some(b'\t' | b' ')) {              tokenizer.attempt(State::Next(StateName::HeadingSetextBefore), State::Nok);              State::Retry(space_or_tab_min_max( @@ -128,7 +132,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'-' | b'=') => {              tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); -            tokenizer.enter(Name::HeadingSetextUnderline); +            tokenizer.enter(Name::HeadingSetextUnderlineSequence);              State::Retry(StateName::HeadingSetextInside)          }          _ => State::Nok, @@ -148,7 +152,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {          State::Next(StateName::HeadingSetextInside)      } else {          tokenizer.tokenize_state.marker = 0; -        tokenizer.exit(Name::HeadingSetextUnderline); +        tokenizer.exit(Name::HeadingSetextUnderlineSequence);          if matches!(tokenizer.current, Some(b'\t' | b' ')) {              tokenizer.attempt(State::Next(StateName::HeadingSetextAfter), State::Nok); @@ -172,6 +176,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {              // Feel free to interrupt.              tokenizer.interrupt = false;              tokenizer.register_resolver(ResolveName::HeadingSetext); +            tokenizer.exit(Name::HeadingSetextUnderline);              State::Ok          }          _ => State::Nok, @@ -179,42 +184,102 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {  }  /// Resolve heading (setext). -pub fn resolve(tokenizer: &mut Tokenizer) { -    let mut index = 0; -    let mut paragraph_enter = None; -    let mut paragraph_exit = None; - -    while index < tokenizer.events.len() { -        let event = &tokenizer.events[index]; - -        // Find paragraphs. -        if event.kind == Kind::Enter { -            if event.name == Name::Paragraph { -                paragraph_enter = Some(index); -            } -        } else if event.name == Name::Paragraph { -            paragraph_exit = Some(index); -        } -        // We know this is preceded by a paragraph. -        // Otherwise we don’t parse. -        else if event.name == Name::HeadingSetextUnderline { -            let enter = paragraph_enter.take().unwrap(); -            let exit = paragraph_exit.take().unwrap(); +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { +    tokenizer.map.consume(&mut tokenizer.events); + +    let mut enter = skip::to(&tokenizer.events, 0, &[Name::HeadingSetextUnderline]); + +    while enter < tokenizer.events.len() { +        let exit = skip::to( +            &tokenizer.events, +            enter + 1, +            &[Name::HeadingSetextUnderline], +        ); + +        // Find paragraph before +        let paragraph_exit_before = skip::opt_back( +            &tokenizer.events, +            enter - 1, +            &[Name::SpaceOrTab, Name::LineEnding, Name::BlockQuotePrefix], +        ); + +        // There’s a paragraph before: this is a setext heading. +        if tokenizer.events[paragraph_exit_before].name == Name::Paragraph { +            let paragraph_enter = skip::to_back( +                &tokenizer.events, +                paragraph_exit_before - 1, +                &[Name::Paragraph], +            );              // Change types of Enter:Paragraph, Exit:Paragraph. -            tokenizer.events[enter].name = Name::HeadingSetextText; -            tokenizer.events[exit].name = Name::HeadingSetextText; +            tokenizer.events[paragraph_enter].name = Name::HeadingSetextText; +            tokenizer.events[paragraph_exit_before].name = Name::HeadingSetextText;              // Add Enter:HeadingSetext, Exit:HeadingSetext. -            let mut heading_enter = tokenizer.events[enter].clone(); +            let mut heading_enter = tokenizer.events[paragraph_enter].clone();              heading_enter.name = Name::HeadingSetext; -            let mut heading_exit = tokenizer.events[index].clone(); +            tokenizer.map.add(paragraph_enter, 0, vec![heading_enter]); +            let mut heading_exit = tokenizer.events[exit].clone();              heading_exit.name = Name::HeadingSetext; - -            tokenizer.map.add(enter, 0, vec![heading_enter]); -            tokenizer.map.add(index + 1, 0, vec![heading_exit]); +            tokenizer.map.add(exit + 1, 0, vec![heading_exit]); +        } else { +            // There’s a following paragraph, move this underline inside it. +            if exit + 3 < tokenizer.events.len() +                && tokenizer.events[exit + 1].name == Name::LineEnding +                && tokenizer.events[exit + 3].name == Name::Paragraph +            { +                // Swap type, HeadingSetextUnderline:Enter -> Paragraph:Enter. +                tokenizer.events[enter].name = Name::Paragraph; +                // Swap type, LineEnding -> Data. +                tokenizer.events[exit + 1].name = Name::Data; +                tokenizer.events[exit + 2].name = Name::Data; +                // Move new data (was line ending) back to include whole line, +                // and link data together. +                tokenizer.events[exit + 1].point = tokenizer.events[enter].point.clone(); +                tokenizer.events[exit + 1].link = Some(Link { +                    previous: None, +                    next: Some(exit + 4), +                    content: Content::Text, +                }); +                tokenizer.events[exit + 4].link.as_mut().unwrap().previous = Some(exit + 1); +                // Remove *including* HeadingSetextUnderline:Exit, until the line ending. +                tokenizer.map.add(enter + 1, exit - enter, vec![]); +                // Remove old Paragraph:Enter. +                tokenizer.map.add(exit + 3, 1, vec![]); +            } else { +                // Swap type. +                tokenizer.events[enter].name = Name::Paragraph; +                tokenizer.events[exit].name = Name::Paragraph; +                // Replace what’s inside the underline (whitespace, sequence). +                tokenizer.map.add( +                    enter + 1, +                    exit - enter - 1, +                    vec![ +                        Event { +                            name: Name::Data, +                            kind: Kind::Enter, +                            point: tokenizer.events[enter].point.clone(), +                            link: Some(Link { +                                previous: None, +                                next: None, +                                content: Content::Text, +                            }), +                        }, +                        Event { +                            name: Name::Data, +                            kind: Kind::Exit, +                            point: tokenizer.events[exit].point.clone(), +                            link: None, +                        }, +                    ], +                ); +            }          } -        index += 1; +        enter = skip::to(&tokenizer.events, exit + 1, &[Name::HeadingSetextUnderline]);      } + +    tokenizer.map.consume(&mut tokenizer.events); + +    Ok(None)  } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index ce1c295..95b9a27 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -183,6 +183,7 @@ use crate::construct::partial_space_or_tab_eol::space_or_tab_eol;  use crate::event::{Event, Kind, Name};  use crate::resolve::Name as ResolveName;  use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult;  use crate::tokenizer::{Label, LabelKind, LabelStart, Tokenizer};  use crate::util::{      constant::RESOURCE_DESTINATION_BALANCE_MAX, @@ -660,7 +661,7 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {  ///  /// This turns matching label starts and label ends into links, images, and  /// footnotes, and turns unmatched label starts back into data. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {      // Inject labels.      let labels = tokenizer.tokenize_state.labels.split_off(0);      inject_labels(tokenizer, &labels); @@ -671,6 +672,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {      mark_as_data(tokenizer, &starts);      tokenizer.map.consume(&mut tokenizer.events); + +    Ok(None)  }  /// Inject links/images/footnotes. diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs index 658c2c7..13b740b 100644 --- a/src/construct/list_item.rs +++ b/src/construct/list_item.rs @@ -62,13 +62,14 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max;  use crate::event::{Kind, Name};  use crate::resolve::Name as ResolveName;  use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult;  use crate::tokenizer::Tokenizer;  use crate::util::{      constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE},      skip,      slice::{Position, Slice},  }; -use alloc::{vec, vec::Vec}; +use alloc::{string::String, vec, vec::Vec};  /// Start of list item.  /// @@ -370,7 +371,7 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {  }  /// Find adjacent list items with the same marker. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {      let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];      let mut lists: Vec<(u8, usize, usize, usize)> = vec![];      let mut index = 0; @@ -472,4 +473,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {          index += 1;      } + +    Ok(None)  } diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 1afa105..ae6facf 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -16,7 +16,7 @@  //! Content types also have a *rest* thing: after all things are parsed,  //! there’s something left.  //! In document, that is [flow][]. -//! In flow, that is a [paragraph][]. +//! In flow, that is [content][].  //! In string and text, that is [data][partial_data].  //!  //! ## Construct @@ -37,6 +37,7 @@  //! *   [character escape][character_escape]  //! *   [character reference][character_reference]  //! *   [code (indented)][code_indented] +//! *   [content][]  //! *   [definition][]  //! *   [hard break (escape)][hard_break_escape]  //! *   [heading (atx)][heading_atx] @@ -149,6 +150,7 @@ pub mod block_quote;  pub mod character_escape;  pub mod character_reference;  pub mod code_indented; +pub mod content;  pub mod definition;  pub mod document;  pub mod flow; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index c1e7311..78fbacb 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -1,4 +1,4 @@ -//! Paragraph occurs in the [flow][] content type. +//! Paragraph occurs in the [content][] content type.  //!  //! ## Grammar  //! @@ -11,14 +11,15 @@  //! paragraph ::= 1*line *(eol 1*line)  //! ```  //! -//! As this construct occurs in flow, like all flow constructs, it must be -//! followed by an eol (line ending) or eof (end of file). +//! This construct must be followed by an eol (line ending) or eof (end of +//! file), like flow constructs.  //!  //! Paragraphs can contain line endings and whitespace, but they are not  //! allowed to contain blank lines, or to be blank themselves.  //!  //! The paragraph is interpreted as the [text][] content type. -//! That means that [autolinks][autolink], [code (text)][raw_text], etc are allowed. +//! That means that [autolinks][autolink], [code (text)][raw_text], etc are +//! allowed.  //!  //! ## HTML  //! @@ -34,40 +35,57 @@  //! *   [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)  //! *   [*§ 4.8 Paragraphs* in `CommonMark`](https://spec.commonmark.org/0.30/#paragraphs)  //! -//! [flow]: crate::construct::flow +//! [content]: crate::construct::content  //! [text]: crate::construct::text  //! [autolink]: crate::construct::autolink  //! [raw_text]: crate::construct::raw_text  //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element -use crate::event::{Content, Kind, Link, Name}; -use crate::resolve::Name as ResolveName; +use crate::event::{Content, Link, Name};  use crate::state::{Name as StateName, State}; +use crate::subtokenize::link;  use crate::tokenizer::Tokenizer; -use alloc::vec; -/// Before paragraph. +/// Paragraph start.  ///  /// ```markdown  /// > | abc  ///     ^ +///   | def  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    match tokenizer.current { -        None | Some(b'\n') => unreachable!("unexpected eol/eof"), -        _ => { -            tokenizer.enter(Name::Paragraph); -            tokenizer.enter_link( -                Name::Data, -                Link { -                    previous: None, -                    next: None, -                    content: Content::Text, -                }, -            ); -            State::Retry(StateName::ParagraphInside) -        } +    debug_assert!(tokenizer.current.is_some()); +    tokenizer.enter(Name::Paragraph); +    State::Retry(StateName::ParagraphLineStart) +} + +/// Start of a line in a paragraph. +/// +/// ```markdown +/// > | abc +///     ^ +/// > | def +///     ^ +/// ``` +pub fn line_start(tokenizer: &mut Tokenizer) -> State { +    debug_assert!(tokenizer.current.is_some()); +    tokenizer.enter_link( +        Name::Data, +        Link { +            previous: None, +            next: None, +            content: Content::Text, +        }, +    ); + +    if tokenizer.tokenize_state.connect { +        let index = tokenizer.events.len() - 1; +        link(&mut tokenizer.events, index); +    } else { +        tokenizer.tokenize_state.connect = true;      } + +    State::Retry(StateName::ParagraphInside)  }  /// In paragraph. @@ -78,91 +96,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  pub fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some(b'\n') => { +        None => { +            tokenizer.tokenize_state.connect = false;              tokenizer.exit(Name::Data);              tokenizer.exit(Name::Paragraph); -            tokenizer.register_resolver_before(ResolveName::Paragraph); -            // You’d be interrupting. -            tokenizer.interrupt = true;              State::Ok          } +        Some(b'\n') => { +            tokenizer.consume(); +            tokenizer.exit(Name::Data); +            State::Next(StateName::ParagraphLineStart) +        }          _ => {              tokenizer.consume();              State::Next(StateName::ParagraphInside)          }      }  } - -/// Merge “`Paragraph`”s, which currently span a single line, into actual -/// `Paragraph`s that span multiple lines. -pub fn resolve(tokenizer: &mut Tokenizer) { -    let mut index = 0; - -    while index < tokenizer.events.len() { -        let event = &tokenizer.events[index]; - -        if event.kind == Kind::Enter && event.name == Name::Paragraph { -            // Exit:Paragraph -            let mut exit_index = index + 3; - -            loop { -                let mut enter_index = exit_index + 1; - -                if enter_index == tokenizer.events.len() -                    || tokenizer.events[enter_index].name != Name::LineEnding -                { -                    break; -                } - -                enter_index += 2; - -                while enter_index < tokenizer.events.len() { -                    let event = &tokenizer.events[enter_index]; - -                    if event.name != Name::SpaceOrTab -                        && event.name != Name::BlockQuotePrefix -                        && event.name != Name::BlockQuoteMarker -                    { -                        break; -                    } - -                    enter_index += 1; -                } - -                if enter_index == tokenizer.events.len() -                    || tokenizer.events[enter_index].name != Name::Paragraph -                { -                    break; -                } - -                // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding. -                tokenizer.map.add(exit_index, 3, vec![]); - -                // Remove Enter:Paragraph. -                tokenizer.map.add(enter_index, 1, vec![]); - -                // Add Exit:LineEnding position info to Exit:Data. -                tokenizer.events[exit_index - 1].point = -                    tokenizer.events[exit_index + 2].point.clone(); - -                // Link Enter:Data on the previous line to Enter:Data on this line. -                if let Some(link) = &mut tokenizer.events[exit_index - 2].link { -                    link.next = Some(enter_index + 1); -                } -                if let Some(link) = &mut tokenizer.events[enter_index + 1].link { -                    link.previous = Some(exit_index - 2); -                } - -                // Potential next start. -                exit_index = enter_index + 3; -            } - -            // Move to `Exit:Paragraph`. -            index = exit_index; -        } - -        index += 1; -    } - -    tokenizer.map.consume(&mut tokenizer.events); -} diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index b6f1f47..b36d9f0 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -8,8 +8,9 @@  use crate::event::{Kind, Name};  use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult;  use crate::tokenizer::Tokenizer; -use alloc::vec; +use alloc::{string::String, vec};  /// At beginning of data.  /// @@ -72,7 +73,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {  }  /// Merge adjacent data events. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {      let mut index = 0;      // Loop through events and merge adjacent data events. @@ -103,4 +104,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {          index += 1;      } + +    Ok(None)  } diff --git a/src/construct/string.rs b/src/construct/string.rs index dba1ac1..cf2f222 100644 --- a/src/construct/string.rs +++ b/src/construct/string.rs @@ -15,7 +15,9 @@  use crate::construct::partial_whitespace::resolve_whitespace;  use crate::resolve::Name as ResolveName;  use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult;  use crate::tokenizer::Tokenizer; +use alloc::string::String;  /// Characters that can start something in string.  const MARKERS: [u8; 2] = [b'&', b'\\']; @@ -74,6 +76,8 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {  }  /// Resolve whitespace in string. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {      resolve_whitespace(tokenizer, false, false); + +    Ok(None)  } diff --git a/src/construct/text.rs b/src/construct/text.rs index 34ea071..2648531 100644 --- a/src/construct/text.rs +++ b/src/construct/text.rs @@ -28,7 +28,9 @@ use crate::construct::gfm_autolink_literal::resolve as resolve_gfm_autolink_lite  use crate::construct::partial_whitespace::resolve_whitespace;  use crate::resolve::Name as ResolveName;  use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult;  use crate::tokenizer::Tokenizer; +use alloc::string::String;  /// Characters that can start something in text.  const MARKERS: [u8; 16] = [ @@ -242,7 +244,7 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {  }  /// Resolve whitespace. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {      resolve_whitespace(          tokenizer,          tokenizer.parse_state.options.constructs.hard_break_trailing, @@ -257,4 +259,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {      {          resolve_gfm_autolink_literal(tokenizer);      } + +    Ok(None)  }  | 
