diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-09-14 16:21:42 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-09-14 16:26:24 +0200 |
commit | 74d2688aa329f0a41c2a92034c3454ed9299e71a (patch) | |
tree | 9ec8fdc6e40ff7cd40a14408afcc47716990134e | |
parent | 65d4b46c2a3bdecb0493e484473d2de3d124f839 (diff) | |
download | markdown-rs-74d2688aa329f0a41c2a92034c3454ed9299e71a.tar.gz markdown-rs-74d2688aa329f0a41c2a92034c3454ed9299e71a.tar.bz2 markdown-rs-74d2688aa329f0a41c2a92034c3454ed9299e71a.zip |
Fix to prefer flow over definitions, setext headings
An undocumented part of CommonMark is how to deal with things in definition
labels or definition titles (which both can span multiple lines).
Can flow (or containers?) interrupt them?
They can according to the `cmark` reference parser, so this was implemented here.
This adds a new `Content` content type, which houses zero or more definitions,
and then zero-or-one paragraphs.
Content can be followed by a setext heading underline, which either turns
into a setext heading when the content ends in a paragraph, or turns into
the start of the following paragraph when it is followed by content that
starts with a paragraph, or turns into a stray paragraph.
Diffstat (limited to '')
-rw-r--r-- | readme.md | 2 | ||||
-rw-r--r-- | src/compiler.rs | 6 | ||||
-rw-r--r-- | src/construct/attention.rs | 6 | ||||
-rw-r--r-- | src/construct/content.rs | 188 | ||||
-rw-r--r-- | src/construct/definition.rs | 26 | ||||
-rw-r--r-- | src/construct/document.rs | 5 | ||||
-rw-r--r-- | src/construct/flow.rs | 33 | ||||
-rw-r--r-- | src/construct/gfm_table.rs | 61 | ||||
-rw-r--r-- | src/construct/heading_atx.rs | 7 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 137 | ||||
-rw-r--r-- | src/construct/label_end.rs | 5 | ||||
-rw-r--r-- | src/construct/list_item.rs | 7 | ||||
-rw-r--r-- | src/construct/mod.rs | 4 | ||||
-rw-r--r-- | src/construct/paragraph.rs | 149 | ||||
-rw-r--r-- | src/construct/partial_data.rs | 7 | ||||
-rw-r--r-- | src/construct/string.rs | 6 | ||||
-rw-r--r-- | src/construct/text.rs | 6 | ||||
-rw-r--r-- | src/event.rs | 51 | ||||
-rw-r--r-- | src/parser.rs | 23 | ||||
-rw-r--r-- | src/resolve.rs | 20 | ||||
-rw-r--r-- | src/state.rs | 202 | ||||
-rw-r--r-- | src/subtokenize.rs | 61 | ||||
-rw-r--r-- | src/tokenizer.rs | 23 | ||||
-rw-r--r-- | tests/definition.rs | 36 | ||||
-rw-r--r-- | tests/fuzz.rs | 2 | ||||
-rw-r--r-- | tests/gfm_table.rs | 6 |
26 files changed, 724 insertions, 355 deletions
@@ -362,7 +362,7 @@ The following scripts are useful when working on this project: ``` - lint: ```sh - cargo fmt --check && cargo clippy -- -D clippy::pedantic -D clippy::cargo -A clippy::doc_link_with_quotes + cargo fmt --check && cargo clippy -- -D clippy::pedantic -D clippy::cargo -A clippy::doc_link_with_quotes -A clippy::unnecessary_wraps ``` - test: ```sh diff --git a/src/compiler.rs b/src/compiler.rs index 397e96f..d1ac774 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -463,7 +463,7 @@ fn exit(context: &mut CompileContext) { Name::HeadingAtxSequence => on_exit_heading_atx_sequence(context), Name::HeadingAtxText => on_exit_heading_atx_text(context), Name::HeadingSetextText => on_exit_heading_setext_text(context), - Name::HeadingSetextUnderline => on_exit_heading_setext_underline(context), + Name::HeadingSetextUnderlineSequence => on_exit_heading_setext_underline_sequence(context), Name::HtmlFlow | Name::HtmlText => on_exit_html(context), Name::HtmlFlowData | Name::HtmlTextData => on_exit_html_data(context), Name::Image | Name::Link => on_exit_media(context), @@ -1440,8 +1440,8 @@ fn on_exit_heading_setext_text(context: &mut CompileContext) { context.slurp_one_line_ending = true; } -/// Handle [`Exit`][Kind::Exit]:[`HeadingSetextUnderline`][Name::HeadingSetextUnderline]. -fn on_exit_heading_setext_underline(context: &mut CompileContext) { +/// Handle [`Exit`][Kind::Exit]:[`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence]. +fn on_exit_heading_setext_underline_sequence(context: &mut CompileContext) { let text = context .heading_setext_buffer .take() diff --git a/src/construct/attention.rs b/src/construct/attention.rs index 4a208df..4d58610 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -79,6 +79,7 @@ use crate::event::{Event, Kind, Name, Point}; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult; use crate::tokenizer::Tokenizer; use crate::util::{ char::{ @@ -87,6 +88,7 @@ use crate::util::{ }, slice::Slice, }; +use alloc::string::String; use alloc::{vec, vec::Vec}; /// Attentention sequence that we can take markers from. @@ -150,7 +152,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } /// Resolve sequences. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { // Find all sequences, gather info about them. let mut sequences = get_sequences(tokenizer); @@ -221,6 +223,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) { } tokenizer.map.consume(&mut tokenizer.events); + + Ok(None) } /// Get sequences. diff --git a/src/construct/content.rs b/src/construct/content.rs new file mode 100644 index 0000000..6c10cea --- /dev/null +++ b/src/construct/content.rs @@ -0,0 +1,188 @@ +//! Content occurs in the [flow][] content type. +//! +//! Content contains zero or more [definition][definition]s, followed by zero +//! or one [paragraph][]. +//! +//! The constructs found in flow are: +//! +//! * [Definition][crate::construct::definition] +//! * [Paragraph][crate::construct::paragraph] +//! +//! ## Tokens +//! +//! * [`Content`][Name::Content] +//! +//! > 👉 **Note**: while parsing, [`Content`][Name::Content] +//! > is used, which is later compiled away. +//! +//! ## References +//! +//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js) +//! +//! [flow]: crate::construct::flow +//! [definition]: crate::construct::definition +//! [paragraph]: crate::construct::paragraph + +use crate::event::{Content, Kind, Link, Name}; +use crate::resolve::Name as ResolveName; +use crate::state::{Name as StateName, State}; +use crate::subtokenize::{subtokenize, Subresult}; +use crate::tokenizer::Tokenizer; +use alloc::{string::String, vec}; + +/// Before a content content. +/// +/// ```markdown +/// > | abc +/// ^ +/// ``` +pub fn chunk_start(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => unreachable!("unexpected eol/eof"), + _ => { + tokenizer.enter_link( + Name::Content, + Link { + previous: None, + next: None, + content: Content::Content, + }, + ); + State::Retry(StateName::ContentChunkInside) + } + } +} + +/// In a content chunk. +/// +/// ```markdown +/// > | abc +/// ^^^ +/// ``` +pub fn chunk_inside(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::Content); + tokenizer.register_resolver_before(ResolveName::Content); + // You’d be interrupting. + tokenizer.interrupt = true; + State::Ok + } + _ => { + tokenizer.consume(); + State::Next(StateName::ContentChunkInside) + } + } +} + +/// Before a definition. +/// +/// ```markdown +/// > | [a]: b +/// ^ +/// ``` +pub fn definition_before(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::ContentDefinitionAfter), + State::Next(StateName::ParagraphStart), + ); + State::Retry(StateName::DefinitionStart) +} + +/// After a definition. +/// +/// ```markdown +/// > | [a]: b +/// ^ +/// | c +/// ``` +pub fn definition_after(tokenizer: &mut Tokenizer) -> State { + debug_assert!(matches!(tokenizer.current, None | Some(b'\n'))); + if tokenizer.current.is_none() { + State::Ok + } else { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::ContentDefinitionBefore) + } +} + +/// Merge `Content` chunks, which currently span a single line, into actual +/// `Content`s that span multiple lines. +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { + let mut index = 0; + + while index < tokenizer.events.len() { + let event = &tokenizer.events[index]; + + if event.kind == Kind::Enter && event.name == Name::Content { + // Exit:Content + let mut exit_index = index + 1; + + loop { + let mut enter_index = exit_index + 1; + + if enter_index == tokenizer.events.len() + || tokenizer.events[enter_index].name != Name::LineEnding + { + break; + } + + // Skip past line ending. + enter_index += 2; + + // Skip past prefix. + while enter_index < tokenizer.events.len() { + let event = &tokenizer.events[enter_index]; + + if event.name != Name::SpaceOrTab + && event.name != Name::BlockQuotePrefix + && event.name != Name::BlockQuoteMarker + { + break; + } + + enter_index += 1; + } + + if enter_index == tokenizer.events.len() + || tokenizer.events[enter_index].name != Name::Content + { + break; + } + + // Set Exit:Content point to Exit:LineEnding. + tokenizer.events[exit_index].point = tokenizer.events[exit_index + 2].point.clone(); + // Remove Enter:LineEnding, Exit:LineEnding. + tokenizer.map.add(exit_index + 1, 2, vec![]); + + // Link Enter:Content to Enter:Content on this line and vice versa. + tokenizer.events[exit_index - 1].link.as_mut().unwrap().next = Some(enter_index); + tokenizer.events[enter_index] + .link + .as_mut() + .unwrap() + .previous = Some(exit_index - 1); + + // Potential next start. + exit_index = enter_index + 1; + } + + // Move to `Exit:Content`. + index = exit_index; + } + + index += 1; + } + + tokenizer.map.consume(&mut tokenizer.events); + + let result = subtokenize( + &mut tokenizer.events, + tokenizer.parse_state, + &Some(Content::Content), + )?; + + Ok(Some(result)) +} diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 1071489..8ccfb90 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -1,4 +1,4 @@ -//! Definition occurs in the [flow] content type. +//! Definition occurs in the [content] content type. //! //! ## Grammar //! @@ -12,8 +12,8 @@ //! ; those parts. //! ``` //! -//! As this construct occurs in flow, like all flow constructs, it must be -//! followed by an eol (line ending) or eof (end of file). +//! This construct must be followed by an eol (line ending) or eof (end of +//! file), like flow constructs. //! //! See [`destination`][destination], [`label`][label], and [`title`][title] //! for grammar, notes, and recommendations on each part. @@ -88,7 +88,7 @@ //! * [`definition.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/definition.js) //! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions) //! -//! [flow]: crate::construct::flow +//! [content]: crate::construct::content //! [string]: crate::construct::string //! [character_escape]: crate::construct::character_escape //! [character_reference]: crate::construct::character_reference @@ -157,7 +157,10 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::DefinitionLabel; tokenizer.tokenize_state.token_2 = Name::DefinitionLabelMarker; tokenizer.tokenize_state.token_3 = Name::DefinitionLabelString; - tokenizer.attempt(State::Next(StateName::DefinitionLabelAfter), State::Nok); + tokenizer.attempt( + State::Next(StateName::DefinitionLabelAfter), + State::Next(StateName::DefinitionLabelNok), + ); State::Retry(StateName::LabelStart) } _ => State::Nok, @@ -192,6 +195,19 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State { } } +/// At a non-label +/// +/// ```markdown +/// > | [] +/// ^ +/// ``` +pub fn label_nok(tokenizer: &mut Tokenizer) -> State { + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + State::Nok +} + /// After marker. /// /// ```markdown diff --git a/src/construct/document.rs b/src/construct/document.rs index 45a961d..82f2ebd 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -413,7 +413,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { while !document_lazy_continuation_current && stack_index > 0 { stack_index -= 1; let name = &child.stack[stack_index]; - if name == &Name::Paragraph || name == &Name::Definition || name == &Name::GfmTableHead { + if name == &Name::Content || name == &Name::GfmTableHead { document_lazy_continuation_current = true; } } @@ -423,7 +423,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { if !document_lazy_continuation_current && !child.events.is_empty() { let before = skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding]); let name = &child.events[before].name; - if name == &Name::Paragraph { + if name == &Name::Content { document_lazy_continuation_current = true; } } @@ -582,6 +582,7 @@ fn resolve(tokenizer: &mut Tokenizer) { &tokenizer.events, flow_index, &mut child.events, + (0, 0), ); // Replace the flow data with actual events. diff --git a/src/construct/flow.rs b/src/construct/flow.rs index e97ee63..08e0466 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -12,7 +12,6 @@ //! //! * [Blank line][crate::construct::blank_line] //! * [Code (indented)][crate::construct::code_indented] -//! * [Definition][crate::construct::definition] //! * [Heading (atx)][crate::construct::heading_atx] //! * [Heading (setext)][crate::construct::heading_setext] //! * [HTML (flow)][crate::construct::html_flow] @@ -40,14 +39,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { Some(b'#') => { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + State::Next(StateName::FlowBeforeContent), ); State::Retry(StateName::HeadingAtxStart) } Some(b'$' | b'`' | b'~') => { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + State::Next(StateName::FlowBeforeContent), ); State::Retry(StateName::RawFlowStart) } @@ -56,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { Some(b'*' | b'_') => { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + State::Next(StateName::FlowBeforeContent), ); State::Retry(StateName::ThematicBreakStart) } @@ -70,12 +69,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { Some(b'{') => { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + State::Next(StateName::FlowBeforeContent), ); State::Retry(StateName::MdxExpressionFlowStart) } // Actual parsing: blank line? Indented code? Indented anything? - // Tables, setext heading underlines, definitions, and paragraphs are + // Tables, setext heading underlines, definitions, and Contents are // particularly weird. _ => State::Retry(StateName::FlowBlankLineBefore), } @@ -217,34 +216,20 @@ pub fn before_mdx_expression(tokenizer: &mut Tokenizer) -> State { pub fn before_gfm_table(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeDefinition), + State::Next(StateName::FlowBeforeContent), ); State::Retry(StateName::GfmTableStart) } -/// At definition. -/// -/// ```markdown -/// > | [a]: b -/// ^ -/// ``` -pub fn before_definition(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::DefinitionStart) -} - -/// At paragraph. +/// At content. /// /// ```markdown /// > | a /// ^ /// ``` -pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { +pub fn before_content(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); - State::Retry(StateName::ParagraphStart) + State::Retry(StateName::ContentChunkStart) } /// After blank line. diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs index 27fbadf..63772c4 100644 --- a/src/construct/gfm_table.rs +++ b/src/construct/gfm_table.rs @@ -229,9 +229,10 @@ use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max} use crate::event::{Content, Event, Kind, Link, Name}; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult; use crate::tokenizer::Tokenizer; use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back}; -use alloc::vec; +use alloc::{string::String, vec}; /// Start of a GFM table. /// @@ -771,15 +772,13 @@ pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State { } /// Resolve GFM table. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { let mut index = 0; - // let mut tables = vec![]; let mut in_first_cell_awaiting_pipe = true; let mut in_row = false; let mut in_delimiter_row = false; let mut last_cell = (0, 0, 0, 0); let mut cell = (0, 0, 0, 0); - let mut after_head_awaiting_first_body_row = false; let mut last_table_end = 0; let mut last_table_has_body = false; @@ -800,17 +799,14 @@ pub fn resolve(tokenizer: &mut Tokenizer) { } // Inject table start. - tokenizer.map.add( - index, - 0, - vec![Event { - kind: Kind::Enter, - name: Name::GfmTable, - point: tokenizer.events[index].point.clone(), - link: None, - }], - ); - } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { + let enter = Event { + kind: Kind::Enter, + name: Name::GfmTable, + point: tokenizer.events[index].point.clone(), + link: None, + }; + tokenizer.map.add(index, 0, vec![enter]); + } else if matches!(event.name, Name::GfmTableRow | Name::GfmTableDelimiterRow) { in_delimiter_row = event.name == Name::GfmTableDelimiterRow; in_row = true; in_first_cell_awaiting_pipe = true; @@ -821,23 +817,21 @@ pub fn resolve(tokenizer: &mut Tokenizer) { if after_head_awaiting_first_body_row { after_head_awaiting_first_body_row = false; last_table_has_body = true; - tokenizer.map.add( - index, - 0, - vec![Event { - kind: Kind::Enter, - name: Name::GfmTableBody, - point: tokenizer.events[index].point.clone(), - link: None, - }], - ); + let enter = Event { + kind: Kind::Enter, + name: Name::GfmTableBody, + point: tokenizer.events[index].point.clone(), + link: None, + }; + tokenizer.map.add(index, 0, vec![enter]); } } // Cell data. else if in_row - && (event.name == Name::Data - || event.name == Name::GfmTableDelimiterMarker - || event.name == Name::GfmTableDelimiterFiller) + && matches!( + event.name, + Name::Data | Name::GfmTableDelimiterMarker | Name::GfmTableDelimiterFiller + ) { in_first_cell_awaiting_pipe = false; @@ -868,7 +862,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { } else if event.name == Name::GfmTableHead { after_head_awaiting_first_body_row = true; last_table_end = index; - } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { + } else if matches!(event.name, Name::GfmTableRow | Name::GfmTableDelimiterRow) { in_row = false; last_table_end = index; if last_cell.1 != 0 { @@ -878,9 +872,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) { flush_cell(tokenizer, cell, in_delimiter_row, Some(index)); } } else if in_row - && (event.name == Name::Data - || event.name == Name::GfmTableDelimiterMarker - || event.name == Name::GfmTableDelimiterFiller) + && (matches!( + event.name, + Name::Data | Name::GfmTableDelimiterMarker | Name::GfmTableDelimiterFiller + )) { cell.3 = index; } @@ -891,6 +886,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) { if last_table_end != 0 { flush_table_end(tokenizer, last_table_end, last_table_has_body); } + + Ok(None) } /// Generate a cell. diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index c1090c4..b76e455 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -66,9 +66,10 @@ use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max} use crate::event::{Content, Event, Kind, Link, Name}; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult; use crate::tokenizer::Tokenizer; use crate::util::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE}; -use alloc::vec; +use alloc::{string::String, vec}; /// Start of a heading (atx). /// @@ -222,7 +223,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State { } /// Resolve heading (atx). -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { let mut index = 0; let mut heading_inside = false; let mut data_start: Option<usize> = None; @@ -281,4 +282,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) { index += 1; } + + Ok(None) } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index e9cc759..3a484e1 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -54,6 +54,7 @@ //! * [`HeadingSetext`][Name::HeadingSetext] //! * [`HeadingSetextText`][Name::HeadingSetextText] //! * [`HeadingSetextUnderline`][Name::HeadingSetextUnderline] +//! * [`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence] //! //! ## References //! @@ -70,12 +71,13 @@ //! [atx]: http://www.aaronsw.com/2002/atx/ use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; -use crate::event::{Kind, Name}; +use crate::event::{Content, Event, Kind, Link, Name}; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult; use crate::tokenizer::Tokenizer; -use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back}; -use alloc::vec; +use crate::util::{constant::TAB_SIZE, skip}; +use alloc::{string::String, vec}; /// At start of heading (setext) underline. /// @@ -90,14 +92,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { && !tokenizer.pierce // Require a paragraph before. && (!tokenizer.events.is_empty() - && tokenizer.events[skip_opt_back( + && tokenizer.events[skip::opt_back( &tokenizer.events, tokenizer.events.len() - 1, &[Name::LineEnding, Name::SpaceOrTab], )] .name - == Name::Paragraph) + == Name::Content) { + tokenizer.enter(Name::HeadingSetextUnderline); + if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt(State::Next(StateName::HeadingSetextBefore), State::Nok); State::Retry(space_or_tab_min_max( @@ -128,7 +132,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-' | b'=') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); - tokenizer.enter(Name::HeadingSetextUnderline); + tokenizer.enter(Name::HeadingSetextUnderlineSequence); State::Retry(StateName::HeadingSetextInside) } _ => State::Nok, @@ -148,7 +152,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::HeadingSetextInside) } else { tokenizer.tokenize_state.marker = 0; - tokenizer.exit(Name::HeadingSetextUnderline); + tokenizer.exit(Name::HeadingSetextUnderlineSequence); if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt(State::Next(StateName::HeadingSetextAfter), State::Nok); @@ -172,6 +176,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { // Feel free to interrupt. tokenizer.interrupt = false; tokenizer.register_resolver(ResolveName::HeadingSetext); + tokenizer.exit(Name::HeadingSetextUnderline); State::Ok } _ => State::Nok, @@ -179,42 +184,102 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { } /// Resolve heading (setext). -pub fn resolve(tokenizer: &mut Tokenizer) { - let mut index = 0; - let mut paragraph_enter = None; - let mut paragraph_exit = None; - - while index < tokenizer.events.len() { - let event = &tokenizer.events[index]; - - // Find paragraphs. - if event.kind == Kind::Enter { - if event.name == Name::Paragraph { - paragraph_enter = Some(index); - } - } else if event.name == Name::Paragraph { - paragraph_exit = Some(index); - } - // We know this is preceded by a paragraph. - // Otherwise we don’t parse. - else if event.name == Name::HeadingSetextUnderline { - let enter = paragraph_enter.take().unwrap(); - let exit = paragraph_exit.take().unwrap(); +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { + tokenizer.map.consume(&mut tokenizer.events); + + let mut enter = skip::to(&tokenizer.events, 0, &[Name::HeadingSetextUnderline]); + + while enter < tokenizer.events.len() { + let exit = skip::to( + &tokenizer.events, + enter + 1, + &[Name::HeadingSetextUnderline], + ); + + // Find paragraph before + let paragraph_exit_before = skip::opt_back( + &tokenizer.events, + enter - 1, + &[Name::SpaceOrTab, Name::LineEnding, Name::BlockQuotePrefix], + ); + + // There’s a paragraph before: this is a setext heading. + if tokenizer.events[paragraph_exit_before].name == Name::Paragraph { + let paragraph_enter = skip::to_back( + &tokenizer.events, + paragraph_exit_before - 1, + &[Name::Paragraph], + ); // Change types of Enter:Paragraph, Exit:Paragraph. - tokenizer.events[enter].name = Name::HeadingSetextText; - tokenizer.events[exit].name = Name::HeadingSetextText; + tokenizer.events[paragraph_enter].name = Name::HeadingSetextText; + tokenizer.events[paragraph_exit_before].name = Name::HeadingSetextText; // Add Enter:HeadingSetext, Exit:HeadingSetext. - let mut heading_enter = tokenizer.events[enter].clone(); + let mut heading_enter = tokenizer.events[paragraph_enter].clone(); heading_enter.name = Name::HeadingSetext; - let mut heading_exit = tokenizer.events[index].clone(); + tokenizer.map.add(paragraph_enter, 0, vec![heading_enter]); + let mut heading_exit = tokenizer.events[exit].clone(); heading_exit.name = Name::HeadingSetext; - - tokenizer.map.add(enter, 0, vec![heading_enter]); - tokenizer.map.add(index + 1, 0, vec![heading_exit]); + tokenizer.map.add(exit + 1, 0, vec![heading_exit]); + } else { + // There’s a following paragraph, move this underline inside it. + if exit + 3 < tokenizer.events.len() + && tokenizer.events[exit + 1].name == Name::LineEnding + && tokenizer.events[exit + 3].name == Name::Paragraph + { + // Swap type, HeadingSetextUnderline:Enter -> Paragraph:Enter. + tokenizer.events[enter].name = Name::Paragraph; + // Swap type, LineEnding -> Data. + tokenizer.events[exit + 1].name = Name::Data; + tokenizer.events[exit + 2].name = Name::Data; + // Move new data (was line ending) back to include whole line, + // and link data together. + tokenizer.events[exit + 1].point = tokenizer.events[enter].point.clone(); + tokenizer.events[exit + 1].link = Some(Link { + previous: None, + next: Some(exit + 4), + content: Content::Text, + }); + tokenizer.events[exit + 4].link.as_mut().unwrap().previous = Some(exit + 1); + // Remove *including* HeadingSetextUnderline:Exit, until the line ending. + tokenizer.map.add(enter + 1, exit - enter, vec![]); + // Remove old Paragraph:Enter. + tokenizer.map.add(exit + 3, 1, vec![]); + } else { + // Swap type. + tokenizer.events[enter].name = Name::Paragraph; + tokenizer.events[exit].name = Name::Paragraph; + // Replace what’s inside the underline (whitespace, sequence). + tokenizer.map.add( + enter + 1, + exit - enter - 1, + vec![ + Event { + name: Name::Data, + kind: Kind::Enter, + point: tokenizer.events[enter].point.clone(), + link: Some(Link { + previous: None, + next: None, + content: Content::Text, + }), + }, + Event { + name: Name::Data, + kind: Kind::Exit, + point: tokenizer.events[exit].point.clone(), + link: None, + }, + ], + ); + } } - index += 1; + enter = skip::to(&tokenizer.events, exit + 1, &[Name::HeadingSetextUnderline]); } + + tokenizer.map.consume(&mut tokenizer.events); + + Ok(None) } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index ce1c295..95b9a27 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -183,6 +183,7 @@ use crate::construct::partial_space_or_tab_eol::space_or_tab_eol; use crate::event::{Event, Kind, Name}; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult; use crate::tokenizer::{Label, LabelKind, LabelStart, Tokenizer}; use crate::util::{ constant::RESOURCE_DESTINATION_BALANCE_MAX, @@ -660,7 +661,7 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State { /// /// This turns matching label starts and label ends into links, images, and /// footnotes, and turns unmatched label starts back into data. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { // Inject labels. let labels = tokenizer.tokenize_state.labels.split_off(0); inject_labels(tokenizer, &labels); @@ -671,6 +672,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) { mark_as_data(tokenizer, &starts); tokenizer.map.consume(&mut tokenizer.events); + + Ok(None) } /// Inject links/images/footnotes. diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs index 658c2c7..13b740b 100644 --- a/src/construct/list_item.rs +++ b/src/construct/list_item.rs @@ -62,13 +62,14 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max; use crate::event::{Kind, Name}; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult; use crate::tokenizer::Tokenizer; use crate::util::{ constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}, skip, slice::{Position, Slice}, }; -use alloc::{vec, vec::Vec}; +use alloc::{string::String, vec, vec::Vec}; /// Start of list item. /// @@ -370,7 +371,7 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State { } /// Find adjacent list items with the same marker. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![]; let mut lists: Vec<(u8, usize, usize, usize)> = vec![]; let mut index = 0; @@ -472,4 +473,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) { index += 1; } + + Ok(None) } diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 1afa105..ae6facf 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -16,7 +16,7 @@ //! Content types also have a *rest* thing: after all things are parsed, //! there’s something left. //! In document, that is [flow][]. -//! In flow, that is a [paragraph][]. +//! In flow, that is [content][]. //! In string and text, that is [data][partial_data]. //! //! ## Construct @@ -37,6 +37,7 @@ //! * [character escape][character_escape] //! * [character reference][character_reference] //! * [code (indented)][code_indented] +//! * [content][] //! * [definition][] //! * [hard break (escape)][hard_break_escape] //! * [heading (atx)][heading_atx] @@ -149,6 +150,7 @@ pub mod block_quote; pub mod character_escape; pub mod character_reference; pub mod code_indented; +pub mod content; pub mod definition; pub mod document; pub mod flow; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index c1e7311..78fbacb 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -1,4 +1,4 @@ -//! Paragraph occurs in the [flow][] content type. +//! Paragraph occurs in the [content][] content type. //! //! ## Grammar //! @@ -11,14 +11,15 @@ //! paragraph ::= 1*line *(eol 1*line) //! ``` //! -//! As this construct occurs in flow, like all flow constructs, it must be -//! followed by an eol (line ending) or eof (end of file). +//! This construct must be followed by an eol (line ending) or eof (end of +//! file), like flow constructs. //! //! Paragraphs can contain line endings and whitespace, but they are not //! allowed to contain blank lines, or to be blank themselves. //! //! The paragraph is interpreted as the [text][] content type. -//! That means that [autolinks][autolink], [code (text)][raw_text], etc are allowed. +//! That means that [autolinks][autolink], [code (text)][raw_text], etc are +//! allowed. //! //! ## HTML //! @@ -34,40 +35,57 @@ //! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js) //! * [*§ 4.8 Paragraphs* in `CommonMark`](https://spec.commonmark.org/0.30/#paragraphs) //! -//! [flow]: crate::construct::flow +//! [content]: crate::construct::content //! [text]: crate::construct::text //! [autolink]: crate::construct::autolink //! [raw_text]: crate::construct::raw_text //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element -use crate::event::{Content, Kind, Link, Name}; -use crate::resolve::Name as ResolveName; +use crate::event::{Content, Link, Name}; use crate::state::{Name as StateName, State}; +use crate::subtokenize::link; use crate::tokenizer::Tokenizer; -use alloc::vec; -/// Before paragraph. +/// Paragraph start. /// /// ```markdown /// > | abc /// ^ +/// | def /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None | Some(b'\n') => unreachable!("unexpected eol/eof"), - _ => { - tokenizer.enter(Name::Paragraph); - tokenizer.enter_link( - Name::Data, - Link { - previous: None, - next: None, - content: Content::Text, - }, - ); - State::Retry(StateName::ParagraphInside) - } + debug_assert!(tokenizer.current.is_some()); + tokenizer.enter(Name::Paragraph); + State::Retry(StateName::ParagraphLineStart) +} + +/// Start of a line in a paragraph. +/// +/// ```markdown +/// > | abc +/// ^ +/// > | def +/// ^ +/// ``` +pub fn line_start(tokenizer: &mut Tokenizer) -> State { + debug_assert!(tokenizer.current.is_some()); + tokenizer.enter_link( + Name::Data, + Link { + previous: None, + next: None, + content: Content::Text, + }, + ); + + if tokenizer.tokenize_state.connect { + let index = tokenizer.events.len() - 1; + link(&mut tokenizer.events, index); + } else { + tokenizer.tokenize_state.connect = true; } + + State::Retry(StateName::ParagraphInside) } /// In paragraph. @@ -78,91 +96,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => { + None => { + tokenizer.tokenize_state.connect = false; tokenizer.exit(Name::Data); tokenizer.exit(Name::Paragraph); - tokenizer.register_resolver_before(ResolveName::Paragraph); - // You’d be interrupting. - tokenizer.interrupt = true; State::Ok } + Some(b'\n') => { + tokenizer.consume(); + tokenizer.exit(Name::Data); + State::Next(StateName::ParagraphLineStart) + } _ => { tokenizer.consume(); State::Next(StateName::ParagraphInside) } } } - -/// Merge “`Paragraph`”s, which currently span a single line, into actual -/// `Paragraph`s that span multiple lines. -pub fn resolve(tokenizer: &mut Tokenizer) { - let mut index = 0; - - while index < tokenizer.events.len() { - let event = &tokenizer.events[index]; - - if event.kind == Kind::Enter && event.name == Name::Paragraph { - // Exit:Paragraph - let mut exit_index = index + 3; - - loop { - let mut enter_index = exit_index + 1; - - if enter_index == tokenizer.events.len() - || tokenizer.events[enter_index].name != Name::LineEnding - { - break; - } - - enter_index += 2; - - while enter_index < tokenizer.events.len() { - let event = &tokenizer.events[enter_index]; - - if event.name != Name::SpaceOrTab - && event.name != Name::BlockQuotePrefix - && event.name != Name::BlockQuoteMarker - { - break; - } - - enter_index += 1; - } - - if enter_index == tokenizer.events.len() - || tokenizer.events[enter_index].name != Name::Paragraph - { - break; - } - - // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding. - tokenizer.map.add(exit_index, 3, vec![]); - - // Remove Enter:Paragraph. - tokenizer.map.add(enter_index, 1, vec![]); - - // Add Exit:LineEnding position info to Exit:Data. - tokenizer.events[exit_index - 1].point = - tokenizer.events[exit_index + 2].point.clone(); - - // Link Enter:Data on the previous line to Enter:Data on this line. - if let Some(link) = &mut tokenizer.events[exit_index - 2].link { - link.next = Some(enter_index + 1); - } - if let Some(link) = &mut tokenizer.events[enter_index + 1].link { - link.previous = Some(exit_index - 2); - } - - // Potential next start. - exit_index = enter_index + 3; - } - - // Move to `Exit:Paragraph`. - index = exit_index; - } - - index += 1; - } - - tokenizer.map.consume(&mut tokenizer.events); -} diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index b6f1f47..b36d9f0 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -8,8 +8,9 @@ use crate::event::{Kind, Name}; use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult; use crate::tokenizer::Tokenizer; -use alloc::vec; +use alloc::{string::String, vec}; /// At beginning of data. /// @@ -72,7 +73,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } /// Merge adjacent data events. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { let mut index = 0; // Loop through events and merge adjacent data events. @@ -103,4 +104,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) { index += 1; } + + Ok(None) } diff --git a/src/construct/string.rs b/src/construct/string.rs index dba1ac1..cf2f222 100644 --- a/src/construct/string.rs +++ b/src/construct/string.rs @@ -15,7 +15,9 @@ use crate::construct::partial_whitespace::resolve_whitespace; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult; use crate::tokenizer::Tokenizer; +use alloc::string::String; /// Characters that can start something in string. const MARKERS: [u8; 2] = [b'&', b'\\']; @@ -74,6 +76,8 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State { } /// Resolve whitespace in string. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { resolve_whitespace(tokenizer, false, false); + + Ok(None) } diff --git a/src/construct/text.rs b/src/construct/text.rs index 34ea071..2648531 100644 --- a/src/construct/text.rs +++ b/src/construct/text.rs @@ -28,7 +28,9 @@ use crate::construct::gfm_autolink_literal::resolve as resolve_gfm_autolink_lite use crate::construct::partial_whitespace::resolve_whitespace; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; +use crate::subtokenize::Subresult; use crate::tokenizer::Tokenizer; +use alloc::string::String; /// Characters that can start something in text. const MARKERS: [u8; 16] = [ @@ -242,7 +244,7 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State { } /// Resolve whitespace. -pub fn resolve(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> { resolve_whitespace( tokenizer, tokenizer.parse_state.options.constructs.hard_break_trailing, @@ -257,4 +259,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) { { resolve_gfm_autolink_literal(tokenizer); } + + Ok(None) } diff --git a/src/event.rs b/src/event.rs index de3f95f..a2626ee 100644 --- a/src/event.rs +++ b/src/event.rs @@ -554,6 +554,26 @@ pub enum Name { /// ^ ^ /// ``` CodeTextSequence, + /// Content. + /// + /// ## Info + /// + /// * **Context**: + /// [flow content][crate::construct::flow] + /// * **Content model**: + /// [content][crate::construct::content] + /// * **Construct**: + /// [`content`][crate::construct::content] + /// + /// ## Example + /// + /// ```markdown + /// > | [a]: b + /// ^^^^^^ + /// > | c. + /// ^^ + /// ``` + Content, /// Data. /// /// ## Info @@ -1754,7 +1774,8 @@ pub enum Name { /// * **Context**: /// [`HeadingSetext`][Name::HeadingSetext] /// * **Content model**: - /// void + /// [`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence], + /// [`SpaceOrTab`][Name::SpaceOrTab] /// * **Construct**: /// [`heading_setext`][crate::construct::heading_setext] /// @@ -1766,6 +1787,25 @@ pub enum Name { /// ^^^^^ /// ``` HeadingSetextUnderline, + /// Heading (setext) underline sequence. + /// + /// ## Info + /// + /// * **Context**: + /// [`HeadingSetext`][Name::HeadingSetext] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`heading_setext`][crate::construct::heading_setext] + /// + /// ## Example + /// + /// ```markdown + /// | alpha + /// > | ===== + /// ^^^^^ + /// ``` + HeadingSetextUnderlineSequence, /// Whole html (flow). /// /// ## Info @@ -2914,13 +2954,12 @@ pub enum Name { /// ^ /// ``` MdxJsxTagSelfClosingMarker, - - /// Whole paragraph. + /// Paragraph. /// /// ## Info /// /// * **Context**: - /// [flow content][crate::construct::flow] + /// [content][crate::construct::content] /// * **Content model**: /// [text content][crate::construct::text] /// * **Construct**: @@ -3340,7 +3379,7 @@ pub const VOID_EVENTS: [Name; 75] = [ Name::HardBreakEscape, Name::HardBreakTrailing, Name::HeadingAtxSequence, - Name::HeadingSetextUnderline, + Name::HeadingSetextUnderlineSequence, Name::HtmlFlowData, Name::HtmlTextData, Name::LabelImageMarker, @@ -3380,6 +3419,8 @@ pub const VOID_EVENTS: [Name; 75] = [ pub enum Content { /// Represents [flow content][crate::construct::flow]. Flow, + /// Represents [content][crate::construct::content]. + Content, /// Represents [string content][crate::construct::string]. String, /// Represents [text content][crate::construct::text]. diff --git a/src/parser.rs b/src/parser.rs index 3a7713a..c69eb38 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -49,16 +49,25 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> Result<(Vec<Event>, &' (parse_state.bytes.len(), 0), State::Next(StateName::DocumentStart), ); - tokenizer.flush(state, true)?; - + let mut result = tokenizer.flush(state, true)?; let mut events = tokenizer.events; - let footnote = tokenizer.tokenize_state.gfm_footnote_definitions; - let normal = tokenizer.tokenize_state.definitions; - parse_state.gfm_footnote_definitions = footnote; - parse_state.definitions = normal; + parse_state + .gfm_footnote_definitions + .append(&mut result.gfm_footnote_definitions); + parse_state.definitions.append(&mut result.definitions); + + loop { + let mut result = subtokenize(&mut events, &parse_state, &None)?; + parse_state + .gfm_footnote_definitions + .append(&mut result.gfm_footnote_definitions); + parse_state.definitions.append(&mut result.definitions); - while !(subtokenize(&mut events, &parse_state)?) {} + if result.done { + break; + } + } Ok((events, parse_state.bytes)) } diff --git a/src/resolve.rs b/src/resolve.rs index d015213..2586676 100644 --- a/src/resolve.rs +++ b/src/resolve.rs @@ -1,7 +1,9 @@ //! Resolve events. use crate::construct; +use crate::subtokenize::Subresult; use crate::tokenizer::Tokenizer; +use alloc::string::String; /// Names of resolvers. #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -32,8 +34,8 @@ pub enum Name { HeadingAtx, /// Resolve heading (setext). /// - /// Heading (setext) is parsed as an underline that is preceded by a - /// paragraph, both will form the whole construct. + /// Heading (setext) is parsed as an underline that is preceded by content, + /// both will form the whole construct. HeadingSetext, /// Resolve list item. /// @@ -41,12 +43,12 @@ pub enum Name { /// They are wrapped into ordered or unordered lists based on whether items /// with the same marker occur next to each other. ListItem, - /// Resolve paragraphs. + /// Resolve content. /// - /// Paragraphs are parsed as single line paragraphs, as what remains if - /// other flow constructs don’t match. + /// Content is parsed as single lines, as what remains if other flow + /// constructs don’t match. /// But, when they occur next to each other, they need to be merged. - Paragraph, + Content, /// Resolve data. /// /// Data is parsed as many small bits, due to many punctuation characters @@ -61,7 +63,7 @@ pub enum Name { } /// Call the corresponding resolver. -pub fn call(tokenizer: &mut Tokenizer, name: Name) { +pub fn call(tokenizer: &mut Tokenizer, name: Name) -> Result<Option<Subresult>, String> { let func = match name { Name::Label => construct::label_end::resolve, Name::Attention => construct::attention::resolve, @@ -69,11 +71,11 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) { Name::HeadingAtx => construct::heading_atx::resolve, Name::HeadingSetext => construct::heading_setext::resolve, Name::ListItem => construct::list_item::resolve, - Name::Paragraph => construct::paragraph::resolve, + Name::Content => construct::content::resolve, Name::Data => construct::partial_data::resolve, Name::String => construct::string::resolve, Name::Text => construct::text::resolve, }; - func(tokenizer); + func(tokenizer) } diff --git a/src/state.rs b/src/state.rs index 1d15239..896761e 100644 --- a/src/state.rs +++ b/src/state.rs @@ -75,24 +75,6 @@ pub enum Name { CharacterReferenceNumeric, CharacterReferenceValue, - RawFlowStart, - RawFlowBeforeSequenceOpen, - RawFlowSequenceOpen, - RawFlowInfoBefore, - RawFlowInfo, - RawFlowMetaBefore, - RawFlowMeta, - RawFlowAtNonLazyBreak, - RawFlowCloseStart, - RawFlowBeforeSequenceClose, - RawFlowSequenceClose, - RawFlowAfterSequenceClose, - RawFlowContentBefore, - RawFlowContentStart, - RawFlowBeforeContentChunk, - RawFlowContentChunk, - RawFlowAfter, - CodeIndentedStart, CodeIndentedAtBreak, CodeIndentedAfter, @@ -101,11 +83,10 @@ pub enum Name { CodeIndentedFurtherBegin, CodeIndentedFurtherAfter, - RawTextStart, - RawTextSequenceOpen, - RawTextBetween, - RawTextData, - RawTextSequenceClose, + ContentChunkStart, + ContentChunkInside, + ContentDefinitionBefore, + ContentDefinitionAfter, DataStart, DataInside, @@ -114,6 +95,7 @@ pub enum Name { DefinitionStart, DefinitionBefore, DefinitionLabelAfter, + DefinitionLabelNok, DefinitionMarkerAfter, DefinitionDestinationBefore, DefinitionDestinationAfter, @@ -155,11 +137,10 @@ pub enum Name { FlowBeforeHeadingAtx, FlowBeforeHeadingSetext, FlowBeforeThematicBreak, - FlowBeforeDefinition, FlowAfter, FlowBlankLineBefore, FlowBlankLineAfter, - FlowBeforeParagraph, + FlowBeforeContent, FrontmatterStart, FrontmatterOpenSequence, @@ -363,6 +344,21 @@ pub enum Name { ListItemContBlank, ListItemContFilled, + MdxExpressionTextStart, + MdxExpressionTextAfter, + + MdxExpressionFlowStart, + MdxExpressionFlowBefore, + MdxExpressionFlowAfter, + MdxExpressionFlowEnd, + + MdxExpressionStart, + MdxExpressionBefore, + MdxExpressionInside, + MdxExpressionEolAfter, + MdxJsxAttributeValueExpressionAfter, + MdxJsxAttributeExpressionAfter, + MdxJsxFlowStart, MdxJsxFlowBefore, MdxJsxFlowAfter, @@ -402,8 +398,33 @@ pub enum Name { NonLazyContinuationAfter, ParagraphStart, + ParagraphLineStart, ParagraphInside, + RawFlowStart, + RawFlowBeforeSequenceOpen, + RawFlowSequenceOpen, + RawFlowInfoBefore, + RawFlowInfo, + RawFlowMetaBefore, + RawFlowMeta, + RawFlowAtNonLazyBreak, + RawFlowCloseStart, + RawFlowBeforeSequenceClose, + RawFlowSequenceClose, + RawFlowAfterSequenceClose, + RawFlowContentBefore, + RawFlowContentStart, + RawFlowBeforeContentChunk, + RawFlowContentChunk, + RawFlowAfter, + + RawTextStart, + RawTextSequenceOpen, + RawTextBetween, + RawTextData, + RawTextSequenceClose, + SpaceOrTabStart, SpaceOrTabInside, SpaceOrTabAfter, @@ -438,47 +459,12 @@ pub enum Name { TitleAtBlankLine, TitleEscape, TitleInside, - - MdxExpressionTextStart, - MdxExpressionTextAfter, - - MdxExpressionFlowStart, - MdxExpressionFlowBefore, - MdxExpressionFlowAfter, - MdxExpressionFlowEnd, - - MdxExpressionStart, - MdxExpressionBefore, - MdxExpressionInside, - MdxExpressionEolAfter, - MdxJsxAttributeValueExpressionAfter, - MdxJsxAttributeExpressionAfter, } #[allow(clippy::too_many_lines)] /// Call the corresponding state for a state name. pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { let func = match name { - Name::MdxExpressionTextStart => construct::mdx_expression_text::start, - Name::MdxExpressionTextAfter => construct::mdx_expression_text::after, - - Name::MdxExpressionFlowStart => construct::mdx_expression_flow::start, - Name::MdxExpressionFlowBefore => construct::mdx_expression_flow::before, - Name::MdxExpressionFlowAfter => construct::mdx_expression_flow::after, - Name::MdxExpressionFlowEnd => construct::mdx_expression_flow::end, - - Name::MdxExpressionStart => construct::partial_mdx_expression::start, - Name::MdxExpressionBefore => construct::partial_mdx_expression::before, - Name::MdxExpressionInside => construct::partial_mdx_expression::inside, - Name::MdxExpressionEolAfter => construct::partial_mdx_expression::eol_after, - - Name::MdxJsxAttributeValueExpressionAfter => { - construct::partial_mdx_jsx::attribute_value_expression_after - } - Name::MdxJsxAttributeExpressionAfter => { - construct::partial_mdx_jsx::attribute_expression_after - } - Name::AttentionStart => construct::attention::start, Name::AttentionInside => construct::attention::inside, @@ -511,24 +497,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::CharacterReferenceNumeric => construct::character_reference::numeric, Name::CharacterReferenceValue => construct::character_reference::value, - Name::RawFlowStart => construct::raw_flow::start, - Name::RawFlowBeforeSequenceOpen => construct::raw_flow::before_sequence_open, - Name::RawFlowSequenceOpen => construct::raw_flow::sequence_open, - Name::RawFlowInfoBefore => construct::raw_flow::info_before, - Name::RawFlowInfo => construct::raw_flow::info, - Name::RawFlowMetaBefore => construct::raw_flow::meta_before, - Name::RawFlowMeta => construct::raw_flow::meta, - Name::RawFlowAtNonLazyBreak => construct::raw_flow::at_non_lazy_break, - Name::RawFlowCloseStart => construct::raw_flow::close_start, - Name::RawFlowBeforeSequenceClose => construct::raw_flow::before_sequence_close, - Name::RawFlowSequenceClose => construct::raw_flow::sequence_close, - Name::RawFlowAfterSequenceClose => construct::raw_flow::sequence_close_after, - Name::RawFlowContentBefore => construct::raw_flow::content_before, - Name::RawFlowContentStart => construct::raw_flow::content_start, - Name::RawFlowBeforeContentChunk => construct::raw_flow::before_content_chunk, - Name::RawFlowContentChunk => construct::raw_flow::content_chunk, - Name::RawFlowAfter => construct::raw_flow::after, - Name::CodeIndentedStart => construct::code_indented::start, Name::CodeIndentedAtBreak => construct::code_indented::at_break, Name::CodeIndentedAfter => construct::code_indented::after, @@ -537,11 +505,10 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::CodeIndentedFurtherBegin => construct::code_indented::further_begin, Name::CodeIndentedFurtherAfter => construct::code_indented::further_after, - Name::RawTextStart => construct::raw_text::start, - Name::RawTextSequenceOpen => construct::raw_text::sequence_open, - Name::RawTextBetween => construct::raw_text::between, - Name::RawTextData => construct::raw_text::data, - Name::RawTextSequenceClose => construct::raw_text::sequence_close, + Name::ContentChunkStart => construct::content::chunk_start, + Name::ContentChunkInside => construct::content::chunk_inside, + Name::ContentDefinitionBefore => construct::content::definition_before, + Name::ContentDefinitionAfter => construct::content::definition_after, Name::DataStart => construct::partial_data::start, Name::DataInside => construct::partial_data::inside, @@ -550,6 +517,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::DefinitionStart => construct::definition::start, Name::DefinitionBefore => construct::definition::before, Name::DefinitionLabelAfter => construct::definition::label_after, + Name::DefinitionLabelNok => construct::definition::label_nok, Name::DefinitionMarkerAfter => construct::definition::marker_after, Name::DefinitionDestinationBefore => construct::definition::destination_before, Name::DefinitionDestinationAfter => construct::definition::destination_after, @@ -599,11 +567,10 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::FlowBeforeHeadingAtx => construct::flow::before_heading_atx, Name::FlowBeforeHeadingSetext => construct::flow::before_heading_setext, Name::FlowBeforeThematicBreak => construct::flow::before_thematic_break, - Name::FlowBeforeDefinition => construct::flow::before_definition, Name::FlowAfter => construct::flow::after, Name::FlowBlankLineBefore => construct::flow::blank_line_before, Name::FlowBlankLineAfter => construct::flow::blank_line_after, - Name::FlowBeforeParagraph => construct::flow::before_paragraph, + Name::FlowBeforeContent => construct::flow::before_content, Name::FrontmatterStart => construct::frontmatter::start, Name::FrontmatterOpenSequence => construct::frontmatter::open_sequence, @@ -624,7 +591,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::GfmAutolinkLiteralProtocolSlashesInside => { construct::gfm_autolink_literal::protocol_slashes_inside } - Name::GfmAutolinkLiteralWwwAfter => construct::gfm_autolink_literal::www_after, Name::GfmAutolinkLiteralWwwStart => construct::gfm_autolink_literal::www_start, Name::GfmAutolinkLiteralWwwPrefixInside => { @@ -636,7 +602,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { construct::gfm_autolink_literal::domain_at_punctuation } Name::GfmAutolinkLiteralDomainAfter => construct::gfm_autolink_literal::domain_after, - Name::GfmAutolinkLiteralPathInside => construct::gfm_autolink_literal::path_inside, Name::GfmAutolinkLiteralPathAtPunctuation => { construct::gfm_autolink_literal::path_at_punctuation @@ -671,21 +636,12 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::GfmLabelStartFootnoteStart => construct::gfm_label_start_footnote::start, Name::GfmLabelStartFootnoteOpen => construct::gfm_label_start_footnote::open, - Name::GfmTaskListItemCheckStart => construct::gfm_task_list_item_check::start, - Name::GfmTaskListItemCheckInside => construct::gfm_task_list_item_check::inside, - Name::GfmTaskListItemCheckClose => construct::gfm_task_list_item_check::close, - Name::GfmTaskListItemCheckAfter => construct::gfm_task_list_item_check::after, - Name::GfmTaskListItemCheckAfterSpaceOrTab => { - construct::gfm_task_list_item_check::after_space_or_tab - } - Name::GfmTableStart => construct::gfm_table::start, Name::GfmTableHeadRowBefore => construct::gfm_table::head_row_before, Name::GfmTableHeadRowStart => construct::gfm_table::head_row_start, Name::GfmTableHeadRowBreak => construct::gfm_table::head_row_break, Name::GfmTableHeadRowData => construct::gfm_table::head_row_data, Name::GfmTableHeadRowEscape => construct::gfm_table::head_row_escape, - Name::GfmTableHeadDelimiterStart => construct::gfm_table::head_delimiter_start, Name::GfmTableHeadDelimiterBefore => construct::gfm_table::head_delimiter_before, Name::GfmTableHeadDelimiterCellBefore => construct::gfm_table::head_delimiter_cell_before, @@ -699,13 +655,20 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { } Name::GfmTableHeadDelimiterCellAfter => construct::gfm_table::head_delimiter_cell_after, Name::GfmTableHeadDelimiterNok => construct::gfm_table::head_delimiter_nok, - Name::GfmTableBodyRowBefore => construct::gfm_table::body_row_before, Name::GfmTableBodyRowStart => construct::gfm_table::body_row_start, Name::GfmTableBodyRowBreak => construct::gfm_table::body_row_break, Name::GfmTableBodyRowData => construct::gfm_table::body_row_data, Name::GfmTableBodyRowEscape => construct::gfm_table::body_row_escape, + Name::GfmTaskListItemCheckStart => construct::gfm_task_list_item_check::start, + Name::GfmTaskListItemCheckInside => construct::gfm_task_list_item_check::inside, + Name::GfmTaskListItemCheckClose => construct::gfm_task_list_item_check::close, + Name::GfmTaskListItemCheckAfter => construct::gfm_task_list_item_check::after, + Name::GfmTaskListItemCheckAfterSpaceOrTab => { + construct::gfm_task_list_item_check::after_space_or_tab + } + Name::HardBreakEscapeStart => construct::hard_break_escape::start, Name::HardBreakEscapeAfter => construct::hard_break_escape::after, @@ -859,11 +822,25 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::ListItemContBlank => construct::list_item::cont_blank, Name::ListItemContFilled => construct::list_item::cont_filled, + Name::MdxExpressionStart => construct::partial_mdx_expression::start, + Name::MdxExpressionBefore => construct::partial_mdx_expression::before, + Name::MdxExpressionInside => construct::partial_mdx_expression::inside, + Name::MdxExpressionEolAfter => construct::partial_mdx_expression::eol_after, + + Name::MdxExpressionFlowStart => construct::mdx_expression_flow::start, + Name::MdxExpressionFlowBefore => construct::mdx_expression_flow::before, + Name::MdxExpressionFlowAfter => construct::mdx_expression_flow::after, + Name::MdxExpressionFlowEnd => construct::mdx_expression_flow::end, + + Name::MdxExpressionTextStart => construct::mdx_expression_text::start, + Name::MdxExpressionTextAfter => construct::mdx_expression_text::after, + Name::MdxJsxFlowStart => construct::mdx_jsx_flow::start, Name::MdxJsxFlowBefore => construct::mdx_jsx_flow::before, Name::MdxJsxFlowAfter => construct::mdx_jsx_flow::after, Name::MdxJsxFlowEnd => construct::mdx_jsx_flow::end, Name::MdxJsxFlowNok => construct::mdx_jsx_flow::nok, + Name::MdxJsxTextStart => construct::mdx_jsx_text::start, Name::MdxJsxTextAfter => construct::mdx_jsx_text::after, Name::MdxJsxTextNok => construct::mdx_jsx_text::nok, @@ -883,6 +860,9 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::MdxJsxLocalNameAfter => construct::partial_mdx_jsx::local_name_after, Name::MdxJsxAttributeBefore => construct::partial_mdx_jsx::attribute_before, Name::MdxJsxSelfClosing => construct::partial_mdx_jsx::self_closing, + Name::MdxJsxAttributeExpressionAfter => { + construct::partial_mdx_jsx::attribute_expression_after + } Name::MdxJsxAttributePrimaryName => construct::partial_mdx_jsx::attribute_primary_name, Name::MdxJsxAttributePrimaryNameAfter => { construct::partial_mdx_jsx::attribute_primary_name_after @@ -899,6 +879,9 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { construct::partial_mdx_jsx::attribute_value_quoted_start } Name::MdxJsxAttributeValueQuoted => construct::partial_mdx_jsx::attribute_value_quoted, + Name::MdxJsxAttributeValueExpressionAfter => { + construct::partial_mdx_jsx::attribute_value_expression_after + } Name::MdxJsxEsWhitespaceStart => construct::partial_mdx_jsx::es_whitespace_start, Name::MdxJsxEsWhitespaceInside => construct::partial_mdx_jsx::es_whitespace_inside, Name::MdxJsxEsWhitespaceEolAfter => construct::partial_mdx_jsx::es_whitespace_eol_after, @@ -907,8 +890,33 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::NonLazyContinuationAfter => construct::partial_non_lazy_continuation::after, Name::ParagraphStart => construct::paragraph::start, + Name::ParagraphLineStart => construct::paragraph::line_start, Name::ParagraphInside => construct::paragraph::inside, + Name::RawFlowStart => construct::raw_flow::start, + Name::RawFlowBeforeSequenceOpen => construct::raw_flow::before_sequence_open, + Name::RawFlowSequenceOpen => construct::raw_flow::sequence_open, + Name::RawFlowInfoBefore => construct::raw_flow::info_before, + Name::RawFlowInfo => construct::raw_flow::info, + Name::RawFlowMetaBefore => construct::raw_flow::meta_before, + Name::RawFlowMeta => construct::raw_flow::meta, + Name::RawFlowAtNonLazyBreak => construct::raw_flow::at_non_lazy_break, + Name::RawFlowCloseStart => construct::raw_flow::close_start, + Name::RawFlowBeforeSequenceClose => construct::raw_flow::before_sequence_close, + Name::RawFlowSequenceClose => construct::raw_flow::sequence_close, + Name::RawFlowAfterSequenceClose => construct::raw_flow::sequence_close_after, + Name::RawFlowContentBefore => construct::raw_flow::content_before, + Name::RawFlowContentStart => construct::raw_flow::content_start, + Name::RawFlowBeforeContentChunk => construct::raw_flow::before_content_chunk, + Name::RawFlowContentChunk => construct::raw_flow::content_chunk, + Name::RawFlowAfter => construct::raw_flow::after, + + Name::RawTextStart => construct::raw_text::start, + Name::RawTextSequenceOpen => construct::raw_text::sequence_open, + Name::RawTextBetween => construct::raw_text::between, + Name::RawTextData => construct::raw_text::data, + Name::RawTextSequenceClose => construct::raw_text::sequence_close, + Name::SpaceOrTabStart => construct::partial_space_or_tab::start, Name::SpaceOrTabInside => construct::partial_space_or_tab::inside, Name::SpaceOrTabAfter => construct::partial_space_or_tab::after, diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 12f91cf..5bb7e98 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -24,6 +24,13 @@ use crate::tokenizer::Tokenizer; use crate::util::{edit_map::EditMap, skip}; use alloc::{string::String, vec, vec::Vec}; +#[derive(Debug)] +pub struct Subresult { + pub done: bool, + pub gfm_footnote_definitions: Vec<String>, + pub definitions: Vec<String>, +} + /// Link two [`Event`][]s. /// /// Arbitrary (void) events can be linked together. @@ -69,10 +76,19 @@ pub fn link_to(events: &mut [Event], previous: usize, next: usize) { /// Parse linked events. /// /// Supposed to be called repeatedly, returns `true` when done. -pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result<bool, String> { +pub fn subtokenize( + events: &mut Vec<Event>, + parse_state: &ParseState, + filter: &Option<Content>, +) -> Result<Subresult, String> { let mut map = EditMap::new(); - let mut done = true; let mut index = 0; + let mut value = Subresult { + done: true, + gfm_footnote_definitions: vec![], + definitions: vec![], + }; + let mut acc = (0, 0); while index < events.len() { let event = &events[index]; @@ -82,16 +98,19 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result< debug_assert_eq!(event.kind, Kind::Enter); // No need to enter linked events again. - if link.previous == None { + if link.previous == None + && (filter.is_none() || &link.content == filter.as_ref().unwrap()) + { // Index into `events` pointing to a chunk. let mut link_index = Some(index); // Subtokenizer. let mut tokenizer = Tokenizer::new(event.point.clone(), parse_state); // Substate. - let mut state = State::Next(if link.content == Content::String { - StateName::StringStart - } else { - StateName::TextStart + let mut state = State::Next(match link.content { + Content::Flow => unreachable!("flow subcontent not implemented yet"), + Content::Content => StateName::ContentDefinitionBefore, + Content::String => StateName::StringStart, + Content::Text => StateName::TextStart, }); // Check if this is the first paragraph, after zero or more @@ -143,11 +162,14 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result< link_index = link_curr.next; } - tokenizer.flush(state, true)?; + let mut result = tokenizer.flush(state, true)?; + value + .gfm_footnote_definitions + .append(&mut result.gfm_footnote_definitions); + value.definitions.append(&mut result.definitions); + value.done = false; - divide_events(&mut map, events, index, &mut tokenizer.events); - - done = false; + acc = divide_events(&mut map, events, index, &mut tokenizer.events, acc); } } @@ -156,7 +178,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result< map.consume(events); - Ok(done) + Ok(value) } /// Divide `child_events` over links in `events`, the first of which is at @@ -166,15 +188,17 @@ pub fn divide_events( events: &[Event], mut link_index: usize, child_events: &mut Vec<Event>, -) { + acc_before: (usize, usize), +) -> (usize, usize) { // Loop through `child_events` to figure out which parts belong where and // fix deep links. let mut child_index = 0; let mut slices = vec![]; let mut slice_start = 0; let mut old_prev: Option<usize> = None; + let len = child_events.len(); - while child_index < child_events.len() { + while child_index < len { let current = &child_events[child_index].point; let end = &events[link_index + 1].point; @@ -200,7 +224,8 @@ pub fn divide_events( } else { old_prev + link_index - (slices.len() - 1) * 2 }; - prev_event.link.as_mut().unwrap().next = Some(new_link); + prev_event.link.as_mut().unwrap().next = + Some(new_link + acc_before.1 - acc_before.0); } } @@ -219,7 +244,9 @@ pub fn divide_events( // The `index` in `events` where the current link is, // minus 2 events (the enter and exit) for each removed // link. - .map(|previous| previous + link_index - (slices.len() * 2)); + .map(|previous| { + previous + link_index - (slices.len() * 2) + acc_before.1 - acc_before.0 + }); } } @@ -245,4 +272,6 @@ pub fn divide_events( child_events.split_off(slices[index].1), ); } + + (acc_before.0 + (slices.len() * 2), acc_before.1 + len) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5095abb..8441f7e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -12,6 +12,7 @@ use crate::event::{Content, Event, Kind, Link, Name, Point, VOID_EVENTS}; use crate::parser::ParseState; use crate::resolve::{call as call_resolve, Name as ResolveName}; use crate::state::{call, State}; +use crate::subtokenize::Subresult; use crate::util::{char::format_byte_opt, constant::TAB_SIZE, edit_map::EditMap}; use alloc::{boxed::Box, string::String, vec, vec::Vec}; @@ -609,23 +610,35 @@ impl<'a> Tokenizer<'a> { } /// Flush. - pub fn flush(&mut self, state: State, resolve: bool) -> Result<(), String> { + pub fn flush(&mut self, state: State, resolve: bool) -> Result<Subresult, String> { let to = (self.point.index, self.point.vs); let state = push_impl(self, to, to, state, true); - let result = state.to_result(); - if resolve && result.is_ok() { + state.to_result()?; + + let mut value = Subresult { + done: false, + gfm_footnote_definitions: self.tokenize_state.gfm_footnote_definitions.split_off(0), + definitions: self.tokenize_state.definitions.split_off(0), + }; + + if resolve { let resolvers = self.resolvers.split_off(0); let mut index = 0; while index < resolvers.len() { - call_resolve(self, resolvers[index]); + if let Some(mut result) = call_resolve(self, resolvers[index])? { + value + .gfm_footnote_definitions + .append(&mut result.gfm_footnote_definitions); + value.definitions.append(&mut result.definitions); + } index += 1; } self.map.consume(&mut self.events); } - result + Ok(value) } } diff --git a/tests/definition.rs b/tests/definition.rs index 11f783d..6f680ff 100644 --- a/tests/definition.rs +++ b/tests/definition.rs @@ -441,6 +441,42 @@ fn definition() -> Result<(), String> { ); assert_eq!( + micromark("[\na\n=\n]: b"), + "<h1>[\na</h1>\n<p>]: b</p>", + "should prefer setext headings over definition labels" + ); + + assert_eq!( + micromark("[a]: b '\nc\n=\n'"), + "<h1>[a]: b '\nc</h1>\n<p>'</p>", + "should prefer setext headings over definition titles" + ); + + assert_eq!( + micromark("[\n***\n]: b"), + "<p>[</p>\n<hr />\n<p>]: b</p>", + "should prefer thematic breaks over definition labels" + ); + + assert_eq!( + micromark("[a]: b '\n***\n'"), + "<p>[a]: b '</p>\n<hr />\n<p>'</p>", + "should prefer thematic breaks over definition titles" + ); + + assert_eq!( + micromark("[\n```\n]: b"), + "<p>[</p>\n<pre><code>]: b\n</code></pre>\n", + "should prefer code (fenced) over definition labels" + ); + + assert_eq!( + micromark("[a]: b '\n```\n'"), + "<p>[a]: b '</p>\n<pre><code>'\n</code></pre>\n", + "should prefer code (fenced) over definition titles" + ); + + assert_eq!( micromark_with_options( "[foo]: /url \"title\"", &Options { diff --git a/tests/fuzz.rs b/tests/fuzz.rs index 146ff24..47dbea5 100644 --- a/tests/fuzz.rs +++ b/tests/fuzz.rs @@ -6,7 +6,7 @@ use pretty_assertions::assert_eq; fn fuzz() -> Result<(), String> { assert_eq!( micromark("[\n~\na\n-\n\n"), - "<p>[\n~\na</p>\n<ul>\n<li></li>\n</ul>\n", + "<h2>[\n~\na</h2>\n", "1: label, blank lines, and code" ); diff --git a/tests/gfm_table.rs b/tests/gfm_table.rs index 619bf2a..b7f884a 100644 --- a/tests/gfm_table.rs +++ b/tests/gfm_table.rs @@ -338,6 +338,12 @@ fn gfm_table() -> Result<(), String> { ); assert_eq!( + micromark_with_options("[\na\n:-\n]: b", &gfm)?, + "<p>[</p>\n<table>\n<thead>\n<tr>\n<th align=\"left\">a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td align=\"left\">]: b</td>\n</tr>\n</tbody>\n</table>", + "should prefer GFM tables over definitions" + ); + + assert_eq!( micromark_with_options( r###"# Align |