aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--readme.md2
-rw-r--r--src/compiler.rs6
-rw-r--r--src/construct/attention.rs6
-rw-r--r--src/construct/content.rs188
-rw-r--r--src/construct/definition.rs26
-rw-r--r--src/construct/document.rs5
-rw-r--r--src/construct/flow.rs33
-rw-r--r--src/construct/gfm_table.rs61
-rw-r--r--src/construct/heading_atx.rs7
-rw-r--r--src/construct/heading_setext.rs137
-rw-r--r--src/construct/label_end.rs5
-rw-r--r--src/construct/list_item.rs7
-rw-r--r--src/construct/mod.rs4
-rw-r--r--src/construct/paragraph.rs149
-rw-r--r--src/construct/partial_data.rs7
-rw-r--r--src/construct/string.rs6
-rw-r--r--src/construct/text.rs6
-rw-r--r--src/event.rs51
-rw-r--r--src/parser.rs23
-rw-r--r--src/resolve.rs20
-rw-r--r--src/state.rs202
-rw-r--r--src/subtokenize.rs61
-rw-r--r--src/tokenizer.rs23
-rw-r--r--tests/definition.rs36
-rw-r--r--tests/fuzz.rs2
-rw-r--r--tests/gfm_table.rs6
26 files changed, 724 insertions, 355 deletions
diff --git a/readme.md b/readme.md
index c8ed452..eaf55eb 100644
--- a/readme.md
+++ b/readme.md
@@ -362,7 +362,7 @@ The following scripts are useful when working on this project:
```
- lint:
```sh
- cargo fmt --check && cargo clippy -- -D clippy::pedantic -D clippy::cargo -A clippy::doc_link_with_quotes
+ cargo fmt --check && cargo clippy -- -D clippy::pedantic -D clippy::cargo -A clippy::doc_link_with_quotes -A clippy::unnecessary_wraps
```
- test:
```sh
diff --git a/src/compiler.rs b/src/compiler.rs
index 397e96f..d1ac774 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -463,7 +463,7 @@ fn exit(context: &mut CompileContext) {
Name::HeadingAtxSequence => on_exit_heading_atx_sequence(context),
Name::HeadingAtxText => on_exit_heading_atx_text(context),
Name::HeadingSetextText => on_exit_heading_setext_text(context),
- Name::HeadingSetextUnderline => on_exit_heading_setext_underline(context),
+ Name::HeadingSetextUnderlineSequence => on_exit_heading_setext_underline_sequence(context),
Name::HtmlFlow | Name::HtmlText => on_exit_html(context),
Name::HtmlFlowData | Name::HtmlTextData => on_exit_html_data(context),
Name::Image | Name::Link => on_exit_media(context),
@@ -1440,8 +1440,8 @@ fn on_exit_heading_setext_text(context: &mut CompileContext) {
context.slurp_one_line_ending = true;
}
-/// Handle [`Exit`][Kind::Exit]:[`HeadingSetextUnderline`][Name::HeadingSetextUnderline].
-fn on_exit_heading_setext_underline(context: &mut CompileContext) {
+/// Handle [`Exit`][Kind::Exit]:[`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence].
+fn on_exit_heading_setext_underline_sequence(context: &mut CompileContext) {
let text = context
.heading_setext_buffer
.take()
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 4a208df..4d58610 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -79,6 +79,7 @@
use crate::event::{Event, Kind, Name, Point};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::{
char::{
@@ -87,6 +88,7 @@ use crate::util::{
},
slice::Slice,
};
+use alloc::string::String;
use alloc::{vec, vec::Vec};
/// Attentention sequence that we can take markers from.
@@ -150,7 +152,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve sequences.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
// Find all sequences, gather info about them.
let mut sequences = get_sequences(tokenizer);
@@ -221,6 +223,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
}
tokenizer.map.consume(&mut tokenizer.events);
+
+ Ok(None)
}
/// Get sequences.
diff --git a/src/construct/content.rs b/src/construct/content.rs
new file mode 100644
index 0000000..6c10cea
--- /dev/null
+++ b/src/construct/content.rs
@@ -0,0 +1,188 @@
+//! Content occurs in the [flow][] content type.
+//!
+//! Content contains zero or more [definition][definition]s, followed by zero
+//! or one [paragraph][].
+//!
+//! The constructs found in flow are:
+//!
+//! * [Definition][crate::construct::definition]
+//! * [Paragraph][crate::construct::paragraph]
+//!
+//! ## Tokens
+//!
+//! * [`Content`][Name::Content]
+//!
+//! > 👉 **Note**: while parsing, [`Content`][Name::Content]
+//! > is used, which is later compiled away.
+//!
+//! ## References
+//!
+//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)
+//!
+//! [flow]: crate::construct::flow
+//! [definition]: crate::construct::definition
+//! [paragraph]: crate::construct::paragraph
+
+use crate::event::{Content, Kind, Link, Name};
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::subtokenize::{subtokenize, Subresult};
+use crate::tokenizer::Tokenizer;
+use alloc::{string::String, vec};
+
+/// Before a content content.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// ```
+pub fn chunk_start(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => unreachable!("unexpected eol/eof"),
+ _ => {
+ tokenizer.enter_link(
+ Name::Content,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::Content,
+ },
+ );
+ State::Retry(StateName::ContentChunkInside)
+ }
+ }
+}
+
+/// In a content chunk.
+///
+/// ```markdown
+/// > | abc
+/// ^^^
+/// ```
+pub fn chunk_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::Content);
+ tokenizer.register_resolver_before(ResolveName::Content);
+ // You’d be interrupting.
+ tokenizer.interrupt = true;
+ State::Ok
+ }
+ _ => {
+ tokenizer.consume();
+ State::Next(StateName::ContentChunkInside)
+ }
+ }
+}
+
+/// Before a definition.
+///
+/// ```markdown
+/// > | [a]: b
+/// ^
+/// ```
+pub fn definition_before(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::ContentDefinitionAfter),
+ State::Next(StateName::ParagraphStart),
+ );
+ State::Retry(StateName::DefinitionStart)
+}
+
+/// After a definition.
+///
+/// ```markdown
+/// > | [a]: b
+/// ^
+/// | c
+/// ```
+pub fn definition_after(tokenizer: &mut Tokenizer) -> State {
+ debug_assert!(matches!(tokenizer.current, None | Some(b'\n')));
+ if tokenizer.current.is_none() {
+ State::Ok
+ } else {
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ State::Next(StateName::ContentDefinitionBefore)
+ }
+}
+
+/// Merge `Content` chunks, which currently span a single line, into actual
+/// `Content`s that span multiple lines.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+ let mut index = 0;
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.kind == Kind::Enter && event.name == Name::Content {
+ // Exit:Content
+ let mut exit_index = index + 1;
+
+ loop {
+ let mut enter_index = exit_index + 1;
+
+ if enter_index == tokenizer.events.len()
+ || tokenizer.events[enter_index].name != Name::LineEnding
+ {
+ break;
+ }
+
+ // Skip past line ending.
+ enter_index += 2;
+
+ // Skip past prefix.
+ while enter_index < tokenizer.events.len() {
+ let event = &tokenizer.events[enter_index];
+
+ if event.name != Name::SpaceOrTab
+ && event.name != Name::BlockQuotePrefix
+ && event.name != Name::BlockQuoteMarker
+ {
+ break;
+ }
+
+ enter_index += 1;
+ }
+
+ if enter_index == tokenizer.events.len()
+ || tokenizer.events[enter_index].name != Name::Content
+ {
+ break;
+ }
+
+ // Set Exit:Content point to Exit:LineEnding.
+ tokenizer.events[exit_index].point = tokenizer.events[exit_index + 2].point.clone();
+ // Remove Enter:LineEnding, Exit:LineEnding.
+ tokenizer.map.add(exit_index + 1, 2, vec![]);
+
+ // Link Enter:Content to Enter:Content on this line and vice versa.
+ tokenizer.events[exit_index - 1].link.as_mut().unwrap().next = Some(enter_index);
+ tokenizer.events[enter_index]
+ .link
+ .as_mut()
+ .unwrap()
+ .previous = Some(exit_index - 1);
+
+ // Potential next start.
+ exit_index = enter_index + 1;
+ }
+
+ // Move to `Exit:Content`.
+ index = exit_index;
+ }
+
+ index += 1;
+ }
+
+ tokenizer.map.consume(&mut tokenizer.events);
+
+ let result = subtokenize(
+ &mut tokenizer.events,
+ tokenizer.parse_state,
+ &Some(Content::Content),
+ )?;
+
+ Ok(Some(result))
+}
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 1071489..8ccfb90 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -1,4 +1,4 @@
-//! Definition occurs in the [flow] content type.
+//! Definition occurs in the [content] content type.
//!
//! ## Grammar
//!
@@ -12,8 +12,8 @@
//! ; those parts.
//! ```
//!
-//! As this construct occurs in flow, like all flow constructs, it must be
-//! followed by an eol (line ending) or eof (end of file).
+//! This construct must be followed by an eol (line ending) or eof (end of
+//! file), like flow constructs.
//!
//! See [`destination`][destination], [`label`][label], and [`title`][title]
//! for grammar, notes, and recommendations on each part.
@@ -88,7 +88,7 @@
//! * [`definition.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/definition.js)
//! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions)
//!
-//! [flow]: crate::construct::flow
+//! [content]: crate::construct::content
//! [string]: crate::construct::string
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
@@ -157,7 +157,10 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Name::DefinitionLabel;
tokenizer.tokenize_state.token_2 = Name::DefinitionLabelMarker;
tokenizer.tokenize_state.token_3 = Name::DefinitionLabelString;
- tokenizer.attempt(State::Next(StateName::DefinitionLabelAfter), State::Nok);
+ tokenizer.attempt(
+ State::Next(StateName::DefinitionLabelAfter),
+ State::Next(StateName::DefinitionLabelNok),
+ );
State::Retry(StateName::LabelStart)
}
_ => State::Nok,
@@ -192,6 +195,19 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State {
}
}
+/// At a non-label
+///
+/// ```markdown
+/// > | []
+/// ^
+/// ```
+pub fn label_nok(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ State::Nok
+}
+
/// After marker.
///
/// ```markdown
diff --git a/src/construct/document.rs b/src/construct/document.rs
index 45a961d..82f2ebd 100644
--- a/src/construct/document.rs
+++ b/src/construct/document.rs
@@ -413,7 +413,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
while !document_lazy_continuation_current && stack_index > 0 {
stack_index -= 1;
let name = &child.stack[stack_index];
- if name == &Name::Paragraph || name == &Name::Definition || name == &Name::GfmTableHead {
+ if name == &Name::Content || name == &Name::GfmTableHead {
document_lazy_continuation_current = true;
}
}
@@ -423,7 +423,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
if !document_lazy_continuation_current && !child.events.is_empty() {
let before = skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding]);
let name = &child.events[before].name;
- if name == &Name::Paragraph {
+ if name == &Name::Content {
document_lazy_continuation_current = true;
}
}
@@ -582,6 +582,7 @@ fn resolve(tokenizer: &mut Tokenizer) {
&tokenizer.events,
flow_index,
&mut child.events,
+ (0, 0),
);
// Replace the flow data with actual events.
diff --git a/src/construct/flow.rs b/src/construct/flow.rs
index e97ee63..08e0466 100644
--- a/src/construct/flow.rs
+++ b/src/construct/flow.rs
@@ -12,7 +12,6 @@
//!
//! * [Blank line][crate::construct::blank_line]
//! * [Code (indented)][crate::construct::code_indented]
-//! * [Definition][crate::construct::definition]
//! * [Heading (atx)][crate::construct::heading_atx]
//! * [Heading (setext)][crate::construct::heading_setext]
//! * [HTML (flow)][crate::construct::html_flow]
@@ -40,14 +39,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
Some(b'#') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::HeadingAtxStart)
}
Some(b'$' | b'`' | b'~') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::RawFlowStart)
}
@@ -56,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
Some(b'*' | b'_') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::ThematicBreakStart)
}
@@ -70,12 +69,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
Some(b'{') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::MdxExpressionFlowStart)
}
// Actual parsing: blank line? Indented code? Indented anything?
- // Tables, setext heading underlines, definitions, and paragraphs are
+ // Tables, setext heading underlines, definitions, and Contents are
// particularly weird.
_ => State::Retry(StateName::FlowBlankLineBefore),
}
@@ -217,34 +216,20 @@ pub fn before_mdx_expression(tokenizer: &mut Tokenizer) -> State {
pub fn before_gfm_table(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeDefinition),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::GfmTableStart)
}
-/// At definition.
-///
-/// ```markdown
-/// > | [a]: b
-/// ^
-/// ```
-pub fn before_definition(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::DefinitionStart)
-}
-
-/// At paragraph.
+/// At content.
///
/// ```markdown
/// > | a
/// ^
/// ```
-pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
+pub fn before_content(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
- State::Retry(StateName::ParagraphStart)
+ State::Retry(StateName::ContentChunkStart)
}
/// After blank line.
diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs
index 27fbadf..63772c4 100644
--- a/src/construct/gfm_table.rs
+++ b/src/construct/gfm_table.rs
@@ -229,9 +229,10 @@ use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}
use crate::event::{Content, Event, Kind, Link, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back};
-use alloc::vec;
+use alloc::{string::String, vec};
/// Start of a GFM table.
///
@@ -771,15 +772,13 @@ pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve GFM table.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
let mut index = 0;
- // let mut tables = vec![];
let mut in_first_cell_awaiting_pipe = true;
let mut in_row = false;
let mut in_delimiter_row = false;
let mut last_cell = (0, 0, 0, 0);
let mut cell = (0, 0, 0, 0);
-
let mut after_head_awaiting_first_body_row = false;
let mut last_table_end = 0;
let mut last_table_has_body = false;
@@ -800,17 +799,14 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
}
// Inject table start.
- tokenizer.map.add(
- index,
- 0,
- vec![Event {
- kind: Kind::Enter,
- name: Name::GfmTable,
- point: tokenizer.events[index].point.clone(),
- link: None,
- }],
- );
- } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow {
+ let enter = Event {
+ kind: Kind::Enter,
+ name: Name::GfmTable,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ };
+ tokenizer.map.add(index, 0, vec![enter]);
+ } else if matches!(event.name, Name::GfmTableRow | Name::GfmTableDelimiterRow) {
in_delimiter_row = event.name == Name::GfmTableDelimiterRow;
in_row = true;
in_first_cell_awaiting_pipe = true;
@@ -821,23 +817,21 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
if after_head_awaiting_first_body_row {
after_head_awaiting_first_body_row = false;
last_table_has_body = true;
- tokenizer.map.add(
- index,
- 0,
- vec![Event {
- kind: Kind::Enter,
- name: Name::GfmTableBody,
- point: tokenizer.events[index].point.clone(),
- link: None,
- }],
- );
+ let enter = Event {
+ kind: Kind::Enter,
+ name: Name::GfmTableBody,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ };
+ tokenizer.map.add(index, 0, vec![enter]);
}
}
// Cell data.
else if in_row
- && (event.name == Name::Data
- || event.name == Name::GfmTableDelimiterMarker
- || event.name == Name::GfmTableDelimiterFiller)
+ && matches!(
+ event.name,
+ Name::Data | Name::GfmTableDelimiterMarker | Name::GfmTableDelimiterFiller
+ )
{
in_first_cell_awaiting_pipe = false;
@@ -868,7 +862,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
} else if event.name == Name::GfmTableHead {
after_head_awaiting_first_body_row = true;
last_table_end = index;
- } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow {
+ } else if matches!(event.name, Name::GfmTableRow | Name::GfmTableDelimiterRow) {
in_row = false;
last_table_end = index;
if last_cell.1 != 0 {
@@ -878,9 +872,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
flush_cell(tokenizer, cell, in_delimiter_row, Some(index));
}
} else if in_row
- && (event.name == Name::Data
- || event.name == Name::GfmTableDelimiterMarker
- || event.name == Name::GfmTableDelimiterFiller)
+ && (matches!(
+ event.name,
+ Name::Data | Name::GfmTableDelimiterMarker | Name::GfmTableDelimiterFiller
+ ))
{
cell.3 = index;
}
@@ -891,6 +886,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
if last_table_end != 0 {
flush_table_end(tokenizer, last_table_end, last_table_has_body);
}
+
+ Ok(None)
}
/// Generate a cell.
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index c1090c4..b76e455 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -66,9 +66,10 @@ use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}
use crate::event::{Content, Event, Kind, Link, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
-use alloc::vec;
+use alloc::{string::String, vec};
/// Start of a heading (atx).
///
@@ -222,7 +223,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve heading (atx).
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
let mut index = 0;
let mut heading_inside = false;
let mut data_start: Option<usize> = None;
@@ -281,4 +282,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
index += 1;
}
+
+ Ok(None)
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index e9cc759..3a484e1 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -54,6 +54,7 @@
//! * [`HeadingSetext`][Name::HeadingSetext]
//! * [`HeadingSetextText`][Name::HeadingSetextText]
//! * [`HeadingSetextUnderline`][Name::HeadingSetextUnderline]
+//! * [`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence]
//!
//! ## References
//!
@@ -70,12 +71,13 @@
//! [atx]: http://www.aaronsw.com/2002/atx/
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
-use crate::event::{Kind, Name};
+use crate::event::{Content, Event, Kind, Link, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
-use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back};
-use alloc::vec;
+use crate::util::{constant::TAB_SIZE, skip};
+use alloc::{string::String, vec};
/// At start of heading (setext) underline.
///
@@ -90,14 +92,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
&& !tokenizer.pierce
// Require a paragraph before.
&& (!tokenizer.events.is_empty()
- && tokenizer.events[skip_opt_back(
+ && tokenizer.events[skip::opt_back(
&tokenizer.events,
tokenizer.events.len() - 1,
&[Name::LineEnding, Name::SpaceOrTab],
)]
.name
- == Name::Paragraph)
+ == Name::Content)
{
+ tokenizer.enter(Name::HeadingSetextUnderline);
+
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.attempt(State::Next(StateName::HeadingSetextBefore), State::Nok);
State::Retry(space_or_tab_min_max(
@@ -128,7 +132,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-' | b'=') => {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
- tokenizer.enter(Name::HeadingSetextUnderline);
+ tokenizer.enter(Name::HeadingSetextUnderlineSequence);
State::Retry(StateName::HeadingSetextInside)
}
_ => State::Nok,
@@ -148,7 +152,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
State::Next(StateName::HeadingSetextInside)
} else {
tokenizer.tokenize_state.marker = 0;
- tokenizer.exit(Name::HeadingSetextUnderline);
+ tokenizer.exit(Name::HeadingSetextUnderlineSequence);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.attempt(State::Next(StateName::HeadingSetextAfter), State::Nok);
@@ -172,6 +176,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
// Feel free to interrupt.
tokenizer.interrupt = false;
tokenizer.register_resolver(ResolveName::HeadingSetext);
+ tokenizer.exit(Name::HeadingSetextUnderline);
State::Ok
}
_ => State::Nok,
@@ -179,42 +184,102 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve heading (setext).
-pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut index = 0;
- let mut paragraph_enter = None;
- let mut paragraph_exit = None;
-
- while index < tokenizer.events.len() {
- let event = &tokenizer.events[index];
-
- // Find paragraphs.
- if event.kind == Kind::Enter {
- if event.name == Name::Paragraph {
- paragraph_enter = Some(index);
- }
- } else if event.name == Name::Paragraph {
- paragraph_exit = Some(index);
- }
- // We know this is preceded by a paragraph.
- // Otherwise we don’t parse.
- else if event.name == Name::HeadingSetextUnderline {
- let enter = paragraph_enter.take().unwrap();
- let exit = paragraph_exit.take().unwrap();
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+ tokenizer.map.consume(&mut tokenizer.events);
+
+ let mut enter = skip::to(&tokenizer.events, 0, &[Name::HeadingSetextUnderline]);
+
+ while enter < tokenizer.events.len() {
+ let exit = skip::to(
+ &tokenizer.events,
+ enter + 1,
+ &[Name::HeadingSetextUnderline],
+ );
+
+ // Find paragraph before
+ let paragraph_exit_before = skip::opt_back(
+ &tokenizer.events,
+ enter - 1,
+ &[Name::SpaceOrTab, Name::LineEnding, Name::BlockQuotePrefix],
+ );
+
+ // There’s a paragraph before: this is a setext heading.
+ if tokenizer.events[paragraph_exit_before].name == Name::Paragraph {
+ let paragraph_enter = skip::to_back(
+ &tokenizer.events,
+ paragraph_exit_before - 1,
+ &[Name::Paragraph],
+ );
// Change types of Enter:Paragraph, Exit:Paragraph.
- tokenizer.events[enter].name = Name::HeadingSetextText;
- tokenizer.events[exit].name = Name::HeadingSetextText;
+ tokenizer.events[paragraph_enter].name = Name::HeadingSetextText;
+ tokenizer.events[paragraph_exit_before].name = Name::HeadingSetextText;
// Add Enter:HeadingSetext, Exit:HeadingSetext.
- let mut heading_enter = tokenizer.events[enter].clone();
+ let mut heading_enter = tokenizer.events[paragraph_enter].clone();
heading_enter.name = Name::HeadingSetext;
- let mut heading_exit = tokenizer.events[index].clone();
+ tokenizer.map.add(paragraph_enter, 0, vec![heading_enter]);
+ let mut heading_exit = tokenizer.events[exit].clone();
heading_exit.name = Name::HeadingSetext;
-
- tokenizer.map.add(enter, 0, vec![heading_enter]);
- tokenizer.map.add(index + 1, 0, vec![heading_exit]);
+ tokenizer.map.add(exit + 1, 0, vec![heading_exit]);
+ } else {
+ // There’s a following paragraph, move this underline inside it.
+ if exit + 3 < tokenizer.events.len()
+ && tokenizer.events[exit + 1].name == Name::LineEnding
+ && tokenizer.events[exit + 3].name == Name::Paragraph
+ {
+ // Swap type, HeadingSetextUnderline:Enter -> Paragraph:Enter.
+ tokenizer.events[enter].name = Name::Paragraph;
+ // Swap type, LineEnding -> Data.
+ tokenizer.events[exit + 1].name = Name::Data;
+ tokenizer.events[exit + 2].name = Name::Data;
+ // Move new data (was line ending) back to include whole line,
+ // and link data together.
+ tokenizer.events[exit + 1].point = tokenizer.events[enter].point.clone();
+ tokenizer.events[exit + 1].link = Some(Link {
+ previous: None,
+ next: Some(exit + 4),
+ content: Content::Text,
+ });
+ tokenizer.events[exit + 4].link.as_mut().unwrap().previous = Some(exit + 1);
+ // Remove *including* HeadingSetextUnderline:Exit, until the line ending.
+ tokenizer.map.add(enter + 1, exit - enter, vec![]);
+ // Remove old Paragraph:Enter.
+ tokenizer.map.add(exit + 3, 1, vec![]);
+ } else {
+ // Swap type.
+ tokenizer.events[enter].name = Name::Paragraph;
+ tokenizer.events[exit].name = Name::Paragraph;
+ // Replace what’s inside the underline (whitespace, sequence).
+ tokenizer.map.add(
+ enter + 1,
+ exit - enter - 1,
+ vec![
+ Event {
+ name: Name::Data,
+ kind: Kind::Enter,
+ point: tokenizer.events[enter].point.clone(),
+ link: Some(Link {
+ previous: None,
+ next: None,
+ content: Content::Text,
+ }),
+ },
+ Event {
+ name: Name::Data,
+ kind: Kind::Exit,
+ point: tokenizer.events[exit].point.clone(),
+ link: None,
+ },
+ ],
+ );
+ }
}
- index += 1;
+ enter = skip::to(&tokenizer.events, exit + 1, &[Name::HeadingSetextUnderline]);
}
+
+ tokenizer.map.consume(&mut tokenizer.events);
+
+ Ok(None)
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index ce1c295..95b9a27 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -183,6 +183,7 @@ use crate::construct::partial_space_or_tab_eol::space_or_tab_eol;
use crate::event::{Event, Kind, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::{Label, LabelKind, LabelStart, Tokenizer};
use crate::util::{
constant::RESOURCE_DESTINATION_BALANCE_MAX,
@@ -660,7 +661,7 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {
///
/// This turns matching label starts and label ends into links, images, and
/// footnotes, and turns unmatched label starts back into data.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
// Inject labels.
let labels = tokenizer.tokenize_state.labels.split_off(0);
inject_labels(tokenizer, &labels);
@@ -671,6 +672,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
mark_as_data(tokenizer, &starts);
tokenizer.map.consume(&mut tokenizer.events);
+
+ Ok(None)
}
/// Inject links/images/footnotes.
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
index 658c2c7..13b740b 100644
--- a/src/construct/list_item.rs
+++ b/src/construct/list_item.rs
@@ -62,13 +62,14 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max;
use crate::event::{Kind, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::{
constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE},
skip,
slice::{Position, Slice},
};
-use alloc::{vec, vec::Vec};
+use alloc::{string::String, vec, vec::Vec};
/// Start of list item.
///
@@ -370,7 +371,7 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
}
/// Find adjacent list items with the same marker.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
let mut index = 0;
@@ -472,4 +473,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
index += 1;
}
+
+ Ok(None)
}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 1afa105..ae6facf 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -16,7 +16,7 @@
//! Content types also have a *rest* thing: after all things are parsed,
//! there’s something left.
//! In document, that is [flow][].
-//! In flow, that is a [paragraph][].
+//! In flow, that is [content][].
//! In string and text, that is [data][partial_data].
//!
//! ## Construct
@@ -37,6 +37,7 @@
//! * [character escape][character_escape]
//! * [character reference][character_reference]
//! * [code (indented)][code_indented]
+//! * [content][]
//! * [definition][]
//! * [hard break (escape)][hard_break_escape]
//! * [heading (atx)][heading_atx]
@@ -149,6 +150,7 @@ pub mod block_quote;
pub mod character_escape;
pub mod character_reference;
pub mod code_indented;
+pub mod content;
pub mod definition;
pub mod document;
pub mod flow;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index c1e7311..78fbacb 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -1,4 +1,4 @@
-//! Paragraph occurs in the [flow][] content type.
+//! Paragraph occurs in the [content][] content type.
//!
//! ## Grammar
//!
@@ -11,14 +11,15 @@
//! paragraph ::= 1*line *(eol 1*line)
//! ```
//!
-//! As this construct occurs in flow, like all flow constructs, it must be
-//! followed by an eol (line ending) or eof (end of file).
+//! This construct must be followed by an eol (line ending) or eof (end of
+//! file), like flow constructs.
//!
//! Paragraphs can contain line endings and whitespace, but they are not
//! allowed to contain blank lines, or to be blank themselves.
//!
//! The paragraph is interpreted as the [text][] content type.
-//! That means that [autolinks][autolink], [code (text)][raw_text], etc are allowed.
+//! That means that [autolinks][autolink], [code (text)][raw_text], etc are
+//! allowed.
//!
//! ## HTML
//!
@@ -34,40 +35,57 @@
//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)
//! * [*§ 4.8 Paragraphs* in `CommonMark`](https://spec.commonmark.org/0.30/#paragraphs)
//!
-//! [flow]: crate::construct::flow
+//! [content]: crate::construct::content
//! [text]: crate::construct::text
//! [autolink]: crate::construct::autolink
//! [raw_text]: crate::construct::raw_text
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
-use crate::event::{Content, Kind, Link, Name};
-use crate::resolve::Name as ResolveName;
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
+use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
-use alloc::vec;
-/// Before paragraph.
+/// Paragraph start.
///
/// ```markdown
/// > | abc
/// ^
+/// | def
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => unreachable!("unexpected eol/eof"),
- _ => {
- tokenizer.enter(Name::Paragraph);
- tokenizer.enter_link(
- Name::Data,
- Link {
- previous: None,
- next: None,
- content: Content::Text,
- },
- );
- State::Retry(StateName::ParagraphInside)
- }
+ debug_assert!(tokenizer.current.is_some());
+ tokenizer.enter(Name::Paragraph);
+ State::Retry(StateName::ParagraphLineStart)
+}
+
+/// Start of a line in a paragraph.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// > | def
+/// ^
+/// ```
+pub fn line_start(tokenizer: &mut Tokenizer) -> State {
+ debug_assert!(tokenizer.current.is_some());
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::Text,
+ },
+ );
+
+ if tokenizer.tokenize_state.connect {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ } else {
+ tokenizer.tokenize_state.connect = true;
}
+
+ State::Retry(StateName::ParagraphInside)
}
/// In paragraph.
@@ -78,91 +96,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') => {
+ None => {
+ tokenizer.tokenize_state.connect = false;
tokenizer.exit(Name::Data);
tokenizer.exit(Name::Paragraph);
- tokenizer.register_resolver_before(ResolveName::Paragraph);
- // You’d be interrupting.
- tokenizer.interrupt = true;
State::Ok
}
+ Some(b'\n') => {
+ tokenizer.consume();
+ tokenizer.exit(Name::Data);
+ State::Next(StateName::ParagraphLineStart)
+ }
_ => {
tokenizer.consume();
State::Next(StateName::ParagraphInside)
}
}
}
-
-/// Merge “`Paragraph`”s, which currently span a single line, into actual
-/// `Paragraph`s that span multiple lines.
-pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut index = 0;
-
- while index < tokenizer.events.len() {
- let event = &tokenizer.events[index];
-
- if event.kind == Kind::Enter && event.name == Name::Paragraph {
- // Exit:Paragraph
- let mut exit_index = index + 3;
-
- loop {
- let mut enter_index = exit_index + 1;
-
- if enter_index == tokenizer.events.len()
- || tokenizer.events[enter_index].name != Name::LineEnding
- {
- break;
- }
-
- enter_index += 2;
-
- while enter_index < tokenizer.events.len() {
- let event = &tokenizer.events[enter_index];
-
- if event.name != Name::SpaceOrTab
- && event.name != Name::BlockQuotePrefix
- && event.name != Name::BlockQuoteMarker
- {
- break;
- }
-
- enter_index += 1;
- }
-
- if enter_index == tokenizer.events.len()
- || tokenizer.events[enter_index].name != Name::Paragraph
- {
- break;
- }
-
- // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding.
- tokenizer.map.add(exit_index, 3, vec![]);
-
- // Remove Enter:Paragraph.
- tokenizer.map.add(enter_index, 1, vec![]);
-
- // Add Exit:LineEnding position info to Exit:Data.
- tokenizer.events[exit_index - 1].point =
- tokenizer.events[exit_index + 2].point.clone();
-
- // Link Enter:Data on the previous line to Enter:Data on this line.
- if let Some(link) = &mut tokenizer.events[exit_index - 2].link {
- link.next = Some(enter_index + 1);
- }
- if let Some(link) = &mut tokenizer.events[enter_index + 1].link {
- link.previous = Some(exit_index - 2);
- }
-
- // Potential next start.
- exit_index = enter_index + 3;
- }
-
- // Move to `Exit:Paragraph`.
- index = exit_index;
- }
-
- index += 1;
- }
-
- tokenizer.map.consume(&mut tokenizer.events);
-}
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index b6f1f47..b36d9f0 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -8,8 +8,9 @@
use crate::event::{Kind, Name};
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
-use alloc::vec;
+use alloc::{string::String, vec};
/// At beginning of data.
///
@@ -72,7 +73,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
/// Merge adjacent data events.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
let mut index = 0;
// Loop through events and merge adjacent data events.
@@ -103,4 +104,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
index += 1;
}
+
+ Ok(None)
}
diff --git a/src/construct/string.rs b/src/construct/string.rs
index dba1ac1..cf2f222 100644
--- a/src/construct/string.rs
+++ b/src/construct/string.rs
@@ -15,7 +15,9 @@
use crate::construct::partial_whitespace::resolve_whitespace;
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
+use alloc::string::String;
/// Characters that can start something in string.
const MARKERS: [u8; 2] = [b'&', b'\\'];
@@ -74,6 +76,8 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve whitespace in string.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
resolve_whitespace(tokenizer, false, false);
+
+ Ok(None)
}
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 34ea071..2648531 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -28,7 +28,9 @@ use crate::construct::gfm_autolink_literal::resolve as resolve_gfm_autolink_lite
use crate::construct::partial_whitespace::resolve_whitespace;
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
+use alloc::string::String;
/// Characters that can start something in text.
const MARKERS: [u8; 16] = [
@@ -242,7 +244,7 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve whitespace.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
resolve_whitespace(
tokenizer,
tokenizer.parse_state.options.constructs.hard_break_trailing,
@@ -257,4 +259,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
{
resolve_gfm_autolink_literal(tokenizer);
}
+
+ Ok(None)
}
diff --git a/src/event.rs b/src/event.rs
index de3f95f..a2626ee 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -554,6 +554,26 @@ pub enum Name {
/// ^ ^
/// ```
CodeTextSequence,
+ /// Content.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [flow content][crate::construct::flow]
+ /// * **Content model**:
+ /// [content][crate::construct::content]
+ /// * **Construct**:
+ /// [`content`][crate::construct::content]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | [a]: b
+ /// ^^^^^^
+ /// > | c.
+ /// ^^
+ /// ```
+ Content,
/// Data.
///
/// ## Info
@@ -1754,7 +1774,8 @@ pub enum Name {
/// * **Context**:
/// [`HeadingSetext`][Name::HeadingSetext]
/// * **Content model**:
- /// void
+ /// [`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence],
+ /// [`SpaceOrTab`][Name::SpaceOrTab]
/// * **Construct**:
/// [`heading_setext`][crate::construct::heading_setext]
///
@@ -1766,6 +1787,25 @@ pub enum Name {
/// ^^^^^
/// ```
HeadingSetextUnderline,
+ /// Heading (setext) underline sequence.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`HeadingSetext`][Name::HeadingSetext]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`heading_setext`][crate::construct::heading_setext]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | alpha
+ /// > | =====
+ /// ^^^^^
+ /// ```
+ HeadingSetextUnderlineSequence,
/// Whole html (flow).
///
/// ## Info
@@ -2914,13 +2954,12 @@ pub enum Name {
/// ^
/// ```
MdxJsxTagSelfClosingMarker,
-
- /// Whole paragraph.
+ /// Paragraph.
///
/// ## Info
///
/// * **Context**:
- /// [flow content][crate::construct::flow]
+ /// [content][crate::construct::content]
/// * **Content model**:
/// [text content][crate::construct::text]
/// * **Construct**:
@@ -3340,7 +3379,7 @@ pub const VOID_EVENTS: [Name; 75] = [
Name::HardBreakEscape,
Name::HardBreakTrailing,
Name::HeadingAtxSequence,
- Name::HeadingSetextUnderline,
+ Name::HeadingSetextUnderlineSequence,
Name::HtmlFlowData,
Name::HtmlTextData,
Name::LabelImageMarker,
@@ -3380,6 +3419,8 @@ pub const VOID_EVENTS: [Name; 75] = [
pub enum Content {
/// Represents [flow content][crate::construct::flow].
Flow,
+ /// Represents [content][crate::construct::content].
+ Content,
/// Represents [string content][crate::construct::string].
String,
/// Represents [text content][crate::construct::text].
diff --git a/src/parser.rs b/src/parser.rs
index 3a7713a..c69eb38 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -49,16 +49,25 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> Result<(Vec<Event>, &'
(parse_state.bytes.len(), 0),
State::Next(StateName::DocumentStart),
);
- tokenizer.flush(state, true)?;
-
+ let mut result = tokenizer.flush(state, true)?;
let mut events = tokenizer.events;
- let footnote = tokenizer.tokenize_state.gfm_footnote_definitions;
- let normal = tokenizer.tokenize_state.definitions;
- parse_state.gfm_footnote_definitions = footnote;
- parse_state.definitions = normal;
+ parse_state
+ .gfm_footnote_definitions
+ .append(&mut result.gfm_footnote_definitions);
+ parse_state.definitions.append(&mut result.definitions);
+
+ loop {
+ let mut result = subtokenize(&mut events, &parse_state, &None)?;
+ parse_state
+ .gfm_footnote_definitions
+ .append(&mut result.gfm_footnote_definitions);
+ parse_state.definitions.append(&mut result.definitions);
- while !(subtokenize(&mut events, &parse_state)?) {}
+ if result.done {
+ break;
+ }
+ }
Ok((events, parse_state.bytes))
}
diff --git a/src/resolve.rs b/src/resolve.rs
index d015213..2586676 100644
--- a/src/resolve.rs
+++ b/src/resolve.rs
@@ -1,7 +1,9 @@
//! Resolve events.
use crate::construct;
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
+use alloc::string::String;
/// Names of resolvers.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
@@ -32,8 +34,8 @@ pub enum Name {
HeadingAtx,
/// Resolve heading (setext).
///
- /// Heading (setext) is parsed as an underline that is preceded by a
- /// paragraph, both will form the whole construct.
+ /// Heading (setext) is parsed as an underline that is preceded by content,
+ /// both will form the whole construct.
HeadingSetext,
/// Resolve list item.
///
@@ -41,12 +43,12 @@ pub enum Name {
/// They are wrapped into ordered or unordered lists based on whether items
/// with the same marker occur next to each other.
ListItem,
- /// Resolve paragraphs.
+ /// Resolve content.
///
- /// Paragraphs are parsed as single line paragraphs, as what remains if
- /// other flow constructs don’t match.
+ /// Content is parsed as single lines, as what remains if other flow
+ /// constructs don’t match.
/// But, when they occur next to each other, they need to be merged.
- Paragraph,
+ Content,
/// Resolve data.
///
/// Data is parsed as many small bits, due to many punctuation characters
@@ -61,7 +63,7 @@ pub enum Name {
}
/// Call the corresponding resolver.
-pub fn call(tokenizer: &mut Tokenizer, name: Name) {
+pub fn call(tokenizer: &mut Tokenizer, name: Name) -> Result<Option<Subresult>, String> {
let func = match name {
Name::Label => construct::label_end::resolve,
Name::Attention => construct::attention::resolve,
@@ -69,11 +71,11 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) {
Name::HeadingAtx => construct::heading_atx::resolve,
Name::HeadingSetext => construct::heading_setext::resolve,
Name::ListItem => construct::list_item::resolve,
- Name::Paragraph => construct::paragraph::resolve,
+ Name::Content => construct::content::resolve,
Name::Data => construct::partial_data::resolve,
Name::String => construct::string::resolve,
Name::Text => construct::text::resolve,
};
- func(tokenizer);
+ func(tokenizer)
}
diff --git a/src/state.rs b/src/state.rs
index 1d15239..896761e 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -75,24 +75,6 @@ pub enum Name {
CharacterReferenceNumeric,
CharacterReferenceValue,
- RawFlowStart,
- RawFlowBeforeSequenceOpen,
- RawFlowSequenceOpen,
- RawFlowInfoBefore,
- RawFlowInfo,
- RawFlowMetaBefore,
- RawFlowMeta,
- RawFlowAtNonLazyBreak,
- RawFlowCloseStart,
- RawFlowBeforeSequenceClose,
- RawFlowSequenceClose,
- RawFlowAfterSequenceClose,
- RawFlowContentBefore,
- RawFlowContentStart,
- RawFlowBeforeContentChunk,
- RawFlowContentChunk,
- RawFlowAfter,
-
CodeIndentedStart,
CodeIndentedAtBreak,
CodeIndentedAfter,
@@ -101,11 +83,10 @@ pub enum Name {
CodeIndentedFurtherBegin,
CodeIndentedFurtherAfter,
- RawTextStart,
- RawTextSequenceOpen,
- RawTextBetween,
- RawTextData,
- RawTextSequenceClose,
+ ContentChunkStart,
+ ContentChunkInside,
+ ContentDefinitionBefore,
+ ContentDefinitionAfter,
DataStart,
DataInside,
@@ -114,6 +95,7 @@ pub enum Name {
DefinitionStart,
DefinitionBefore,
DefinitionLabelAfter,
+ DefinitionLabelNok,
DefinitionMarkerAfter,
DefinitionDestinationBefore,
DefinitionDestinationAfter,
@@ -155,11 +137,10 @@ pub enum Name {
FlowBeforeHeadingAtx,
FlowBeforeHeadingSetext,
FlowBeforeThematicBreak,
- FlowBeforeDefinition,
FlowAfter,
FlowBlankLineBefore,
FlowBlankLineAfter,
- FlowBeforeParagraph,
+ FlowBeforeContent,
FrontmatterStart,
FrontmatterOpenSequence,
@@ -363,6 +344,21 @@ pub enum Name {
ListItemContBlank,
ListItemContFilled,
+ MdxExpressionTextStart,
+ MdxExpressionTextAfter,
+
+ MdxExpressionFlowStart,
+ MdxExpressionFlowBefore,
+ MdxExpressionFlowAfter,
+ MdxExpressionFlowEnd,
+
+ MdxExpressionStart,
+ MdxExpressionBefore,
+ MdxExpressionInside,
+ MdxExpressionEolAfter,
+ MdxJsxAttributeValueExpressionAfter,
+ MdxJsxAttributeExpressionAfter,
+
MdxJsxFlowStart,
MdxJsxFlowBefore,
MdxJsxFlowAfter,
@@ -402,8 +398,33 @@ pub enum Name {
NonLazyContinuationAfter,
ParagraphStart,
+ ParagraphLineStart,
ParagraphInside,
+ RawFlowStart,
+ RawFlowBeforeSequenceOpen,
+ RawFlowSequenceOpen,
+ RawFlowInfoBefore,
+ RawFlowInfo,
+ RawFlowMetaBefore,
+ RawFlowMeta,
+ RawFlowAtNonLazyBreak,
+ RawFlowCloseStart,
+ RawFlowBeforeSequenceClose,
+ RawFlowSequenceClose,
+ RawFlowAfterSequenceClose,
+ RawFlowContentBefore,
+ RawFlowContentStart,
+ RawFlowBeforeContentChunk,
+ RawFlowContentChunk,
+ RawFlowAfter,
+
+ RawTextStart,
+ RawTextSequenceOpen,
+ RawTextBetween,
+ RawTextData,
+ RawTextSequenceClose,
+
SpaceOrTabStart,
SpaceOrTabInside,
SpaceOrTabAfter,
@@ -438,47 +459,12 @@ pub enum Name {
TitleAtBlankLine,
TitleEscape,
TitleInside,
-
- MdxExpressionTextStart,
- MdxExpressionTextAfter,
-
- MdxExpressionFlowStart,
- MdxExpressionFlowBefore,
- MdxExpressionFlowAfter,
- MdxExpressionFlowEnd,
-
- MdxExpressionStart,
- MdxExpressionBefore,
- MdxExpressionInside,
- MdxExpressionEolAfter,
- MdxJsxAttributeValueExpressionAfter,
- MdxJsxAttributeExpressionAfter,
}
#[allow(clippy::too_many_lines)]
/// Call the corresponding state for a state name.
pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
let func = match name {
- Name::MdxExpressionTextStart => construct::mdx_expression_text::start,
- Name::MdxExpressionTextAfter => construct::mdx_expression_text::after,
-
- Name::MdxExpressionFlowStart => construct::mdx_expression_flow::start,
- Name::MdxExpressionFlowBefore => construct::mdx_expression_flow::before,
- Name::MdxExpressionFlowAfter => construct::mdx_expression_flow::after,
- Name::MdxExpressionFlowEnd => construct::mdx_expression_flow::end,
-
- Name::MdxExpressionStart => construct::partial_mdx_expression::start,
- Name::MdxExpressionBefore => construct::partial_mdx_expression::before,
- Name::MdxExpressionInside => construct::partial_mdx_expression::inside,
- Name::MdxExpressionEolAfter => construct::partial_mdx_expression::eol_after,
-
- Name::MdxJsxAttributeValueExpressionAfter => {
- construct::partial_mdx_jsx::attribute_value_expression_after
- }
- Name::MdxJsxAttributeExpressionAfter => {
- construct::partial_mdx_jsx::attribute_expression_after
- }
-
Name::AttentionStart => construct::attention::start,
Name::AttentionInside => construct::attention::inside,
@@ -511,24 +497,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::CharacterReferenceNumeric => construct::character_reference::numeric,
Name::CharacterReferenceValue => construct::character_reference::value,
- Name::RawFlowStart => construct::raw_flow::start,
- Name::RawFlowBeforeSequenceOpen => construct::raw_flow::before_sequence_open,
- Name::RawFlowSequenceOpen => construct::raw_flow::sequence_open,
- Name::RawFlowInfoBefore => construct::raw_flow::info_before,
- Name::RawFlowInfo => construct::raw_flow::info,
- Name::RawFlowMetaBefore => construct::raw_flow::meta_before,
- Name::RawFlowMeta => construct::raw_flow::meta,
- Name::RawFlowAtNonLazyBreak => construct::raw_flow::at_non_lazy_break,
- Name::RawFlowCloseStart => construct::raw_flow::close_start,
- Name::RawFlowBeforeSequenceClose => construct::raw_flow::before_sequence_close,
- Name::RawFlowSequenceClose => construct::raw_flow::sequence_close,
- Name::RawFlowAfterSequenceClose => construct::raw_flow::sequence_close_after,
- Name::RawFlowContentBefore => construct::raw_flow::content_before,
- Name::RawFlowContentStart => construct::raw_flow::content_start,
- Name::RawFlowBeforeContentChunk => construct::raw_flow::before_content_chunk,
- Name::RawFlowContentChunk => construct::raw_flow::content_chunk,
- Name::RawFlowAfter => construct::raw_flow::after,
-
Name::CodeIndentedStart => construct::code_indented::start,
Name::CodeIndentedAtBreak => construct::code_indented::at_break,
Name::CodeIndentedAfter => construct::code_indented::after,
@@ -537,11 +505,10 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::CodeIndentedFurtherBegin => construct::code_indented::further_begin,
Name::CodeIndentedFurtherAfter => construct::code_indented::further_after,
- Name::RawTextStart => construct::raw_text::start,
- Name::RawTextSequenceOpen => construct::raw_text::sequence_open,
- Name::RawTextBetween => construct::raw_text::between,
- Name::RawTextData => construct::raw_text::data,
- Name::RawTextSequenceClose => construct::raw_text::sequence_close,
+ Name::ContentChunkStart => construct::content::chunk_start,
+ Name::ContentChunkInside => construct::content::chunk_inside,
+ Name::ContentDefinitionBefore => construct::content::definition_before,
+ Name::ContentDefinitionAfter => construct::content::definition_after,
Name::DataStart => construct::partial_data::start,
Name::DataInside => construct::partial_data::inside,
@@ -550,6 +517,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::DefinitionStart => construct::definition::start,
Name::DefinitionBefore => construct::definition::before,
Name::DefinitionLabelAfter => construct::definition::label_after,
+ Name::DefinitionLabelNok => construct::definition::label_nok,
Name::DefinitionMarkerAfter => construct::definition::marker_after,
Name::DefinitionDestinationBefore => construct::definition::destination_before,
Name::DefinitionDestinationAfter => construct::definition::destination_after,
@@ -599,11 +567,10 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::FlowBeforeHeadingAtx => construct::flow::before_heading_atx,
Name::FlowBeforeHeadingSetext => construct::flow::before_heading_setext,
Name::FlowBeforeThematicBreak => construct::flow::before_thematic_break,
- Name::FlowBeforeDefinition => construct::flow::before_definition,
Name::FlowAfter => construct::flow::after,
Name::FlowBlankLineBefore => construct::flow::blank_line_before,
Name::FlowBlankLineAfter => construct::flow::blank_line_after,
- Name::FlowBeforeParagraph => construct::flow::before_paragraph,
+ Name::FlowBeforeContent => construct::flow::before_content,
Name::FrontmatterStart => construct::frontmatter::start,
Name::FrontmatterOpenSequence => construct::frontmatter::open_sequence,
@@ -624,7 +591,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::GfmAutolinkLiteralProtocolSlashesInside => {
construct::gfm_autolink_literal::protocol_slashes_inside
}
-
Name::GfmAutolinkLiteralWwwAfter => construct::gfm_autolink_literal::www_after,
Name::GfmAutolinkLiteralWwwStart => construct::gfm_autolink_literal::www_start,
Name::GfmAutolinkLiteralWwwPrefixInside => {
@@ -636,7 +602,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
construct::gfm_autolink_literal::domain_at_punctuation
}
Name::GfmAutolinkLiteralDomainAfter => construct::gfm_autolink_literal::domain_after,
-
Name::GfmAutolinkLiteralPathInside => construct::gfm_autolink_literal::path_inside,
Name::GfmAutolinkLiteralPathAtPunctuation => {
construct::gfm_autolink_literal::path_at_punctuation
@@ -671,21 +636,12 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::GfmLabelStartFootnoteStart => construct::gfm_label_start_footnote::start,
Name::GfmLabelStartFootnoteOpen => construct::gfm_label_start_footnote::open,
- Name::GfmTaskListItemCheckStart => construct::gfm_task_list_item_check::start,
- Name::GfmTaskListItemCheckInside => construct::gfm_task_list_item_check::inside,
- Name::GfmTaskListItemCheckClose => construct::gfm_task_list_item_check::close,
- Name::GfmTaskListItemCheckAfter => construct::gfm_task_list_item_check::after,
- Name::GfmTaskListItemCheckAfterSpaceOrTab => {
- construct::gfm_task_list_item_check::after_space_or_tab
- }
-
Name::GfmTableStart => construct::gfm_table::start,
Name::GfmTableHeadRowBefore => construct::gfm_table::head_row_before,
Name::GfmTableHeadRowStart => construct::gfm_table::head_row_start,
Name::GfmTableHeadRowBreak => construct::gfm_table::head_row_break,
Name::GfmTableHeadRowData => construct::gfm_table::head_row_data,
Name::GfmTableHeadRowEscape => construct::gfm_table::head_row_escape,
-
Name::GfmTableHeadDelimiterStart => construct::gfm_table::head_delimiter_start,
Name::GfmTableHeadDelimiterBefore => construct::gfm_table::head_delimiter_before,
Name::GfmTableHeadDelimiterCellBefore => construct::gfm_table::head_delimiter_cell_before,
@@ -699,13 +655,20 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
}
Name::GfmTableHeadDelimiterCellAfter => construct::gfm_table::head_delimiter_cell_after,
Name::GfmTableHeadDelimiterNok => construct::gfm_table::head_delimiter_nok,
-
Name::GfmTableBodyRowBefore => construct::gfm_table::body_row_before,
Name::GfmTableBodyRowStart => construct::gfm_table::body_row_start,
Name::GfmTableBodyRowBreak => construct::gfm_table::body_row_break,
Name::GfmTableBodyRowData => construct::gfm_table::body_row_data,
Name::GfmTableBodyRowEscape => construct::gfm_table::body_row_escape,
+ Name::GfmTaskListItemCheckStart => construct::gfm_task_list_item_check::start,
+ Name::GfmTaskListItemCheckInside => construct::gfm_task_list_item_check::inside,
+ Name::GfmTaskListItemCheckClose => construct::gfm_task_list_item_check::close,
+ Name::GfmTaskListItemCheckAfter => construct::gfm_task_list_item_check::after,
+ Name::GfmTaskListItemCheckAfterSpaceOrTab => {
+ construct::gfm_task_list_item_check::after_space_or_tab
+ }
+
Name::HardBreakEscapeStart => construct::hard_break_escape::start,
Name::HardBreakEscapeAfter => construct::hard_break_escape::after,
@@ -859,11 +822,25 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::ListItemContBlank => construct::list_item::cont_blank,
Name::ListItemContFilled => construct::list_item::cont_filled,
+ Name::MdxExpressionStart => construct::partial_mdx_expression::start,
+ Name::MdxExpressionBefore => construct::partial_mdx_expression::before,
+ Name::MdxExpressionInside => construct::partial_mdx_expression::inside,
+ Name::MdxExpressionEolAfter => construct::partial_mdx_expression::eol_after,
+
+ Name::MdxExpressionFlowStart => construct::mdx_expression_flow::start,
+ Name::MdxExpressionFlowBefore => construct::mdx_expression_flow::before,
+ Name::MdxExpressionFlowAfter => construct::mdx_expression_flow::after,
+ Name::MdxExpressionFlowEnd => construct::mdx_expression_flow::end,
+
+ Name::MdxExpressionTextStart => construct::mdx_expression_text::start,
+ Name::MdxExpressionTextAfter => construct::mdx_expression_text::after,
+
Name::MdxJsxFlowStart => construct::mdx_jsx_flow::start,
Name::MdxJsxFlowBefore => construct::mdx_jsx_flow::before,
Name::MdxJsxFlowAfter => construct::mdx_jsx_flow::after,
Name::MdxJsxFlowEnd => construct::mdx_jsx_flow::end,
Name::MdxJsxFlowNok => construct::mdx_jsx_flow::nok,
+
Name::MdxJsxTextStart => construct::mdx_jsx_text::start,
Name::MdxJsxTextAfter => construct::mdx_jsx_text::after,
Name::MdxJsxTextNok => construct::mdx_jsx_text::nok,
@@ -883,6 +860,9 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::MdxJsxLocalNameAfter => construct::partial_mdx_jsx::local_name_after,
Name::MdxJsxAttributeBefore => construct::partial_mdx_jsx::attribute_before,
Name::MdxJsxSelfClosing => construct::partial_mdx_jsx::self_closing,
+ Name::MdxJsxAttributeExpressionAfter => {
+ construct::partial_mdx_jsx::attribute_expression_after
+ }
Name::MdxJsxAttributePrimaryName => construct::partial_mdx_jsx::attribute_primary_name,
Name::MdxJsxAttributePrimaryNameAfter => {
construct::partial_mdx_jsx::attribute_primary_name_after
@@ -899,6 +879,9 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
construct::partial_mdx_jsx::attribute_value_quoted_start
}
Name::MdxJsxAttributeValueQuoted => construct::partial_mdx_jsx::attribute_value_quoted,
+ Name::MdxJsxAttributeValueExpressionAfter => {
+ construct::partial_mdx_jsx::attribute_value_expression_after
+ }
Name::MdxJsxEsWhitespaceStart => construct::partial_mdx_jsx::es_whitespace_start,
Name::MdxJsxEsWhitespaceInside => construct::partial_mdx_jsx::es_whitespace_inside,
Name::MdxJsxEsWhitespaceEolAfter => construct::partial_mdx_jsx::es_whitespace_eol_after,
@@ -907,8 +890,33 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::NonLazyContinuationAfter => construct::partial_non_lazy_continuation::after,
Name::ParagraphStart => construct::paragraph::start,
+ Name::ParagraphLineStart => construct::paragraph::line_start,
Name::ParagraphInside => construct::paragraph::inside,
+ Name::RawFlowStart => construct::raw_flow::start,
+ Name::RawFlowBeforeSequenceOpen => construct::raw_flow::before_sequence_open,
+ Name::RawFlowSequenceOpen => construct::raw_flow::sequence_open,
+ Name::RawFlowInfoBefore => construct::raw_flow::info_before,
+ Name::RawFlowInfo => construct::raw_flow::info,
+ Name::RawFlowMetaBefore => construct::raw_flow::meta_before,
+ Name::RawFlowMeta => construct::raw_flow::meta,
+ Name::RawFlowAtNonLazyBreak => construct::raw_flow::at_non_lazy_break,
+ Name::RawFlowCloseStart => construct::raw_flow::close_start,
+ Name::RawFlowBeforeSequenceClose => construct::raw_flow::before_sequence_close,
+ Name::RawFlowSequenceClose => construct::raw_flow::sequence_close,
+ Name::RawFlowAfterSequenceClose => construct::raw_flow::sequence_close_after,
+ Name::RawFlowContentBefore => construct::raw_flow::content_before,
+ Name::RawFlowContentStart => construct::raw_flow::content_start,
+ Name::RawFlowBeforeContentChunk => construct::raw_flow::before_content_chunk,
+ Name::RawFlowContentChunk => construct::raw_flow::content_chunk,
+ Name::RawFlowAfter => construct::raw_flow::after,
+
+ Name::RawTextStart => construct::raw_text::start,
+ Name::RawTextSequenceOpen => construct::raw_text::sequence_open,
+ Name::RawTextBetween => construct::raw_text::between,
+ Name::RawTextData => construct::raw_text::data,
+ Name::RawTextSequenceClose => construct::raw_text::sequence_close,
+
Name::SpaceOrTabStart => construct::partial_space_or_tab::start,
Name::SpaceOrTabInside => construct::partial_space_or_tab::inside,
Name::SpaceOrTabAfter => construct::partial_space_or_tab::after,
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 12f91cf..5bb7e98 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -24,6 +24,13 @@ use crate::tokenizer::Tokenizer;
use crate::util::{edit_map::EditMap, skip};
use alloc::{string::String, vec, vec::Vec};
+#[derive(Debug)]
+pub struct Subresult {
+ pub done: bool,
+ pub gfm_footnote_definitions: Vec<String>,
+ pub definitions: Vec<String>,
+}
+
/// Link two [`Event`][]s.
///
/// Arbitrary (void) events can be linked together.
@@ -69,10 +76,19 @@ pub fn link_to(events: &mut [Event], previous: usize, next: usize) {
/// Parse linked events.
///
/// Supposed to be called repeatedly, returns `true` when done.
-pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result<bool, String> {
+pub fn subtokenize(
+ events: &mut Vec<Event>,
+ parse_state: &ParseState,
+ filter: &Option<Content>,
+) -> Result<Subresult, String> {
let mut map = EditMap::new();
- let mut done = true;
let mut index = 0;
+ let mut value = Subresult {
+ done: true,
+ gfm_footnote_definitions: vec![],
+ definitions: vec![],
+ };
+ let mut acc = (0, 0);
while index < events.len() {
let event = &events[index];
@@ -82,16 +98,19 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result<
debug_assert_eq!(event.kind, Kind::Enter);
// No need to enter linked events again.
- if link.previous == None {
+ if link.previous == None
+ && (filter.is_none() || &link.content == filter.as_ref().unwrap())
+ {
// Index into `events` pointing to a chunk.
let mut link_index = Some(index);
// Subtokenizer.
let mut tokenizer = Tokenizer::new(event.point.clone(), parse_state);
// Substate.
- let mut state = State::Next(if link.content == Content::String {
- StateName::StringStart
- } else {
- StateName::TextStart
+ let mut state = State::Next(match link.content {
+ Content::Flow => unreachable!("flow subcontent not implemented yet"),
+ Content::Content => StateName::ContentDefinitionBefore,
+ Content::String => StateName::StringStart,
+ Content::Text => StateName::TextStart,
});
// Check if this is the first paragraph, after zero or more
@@ -143,11 +162,14 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result<
link_index = link_curr.next;
}
- tokenizer.flush(state, true)?;
+ let mut result = tokenizer.flush(state, true)?;
+ value
+ .gfm_footnote_definitions
+ .append(&mut result.gfm_footnote_definitions);
+ value.definitions.append(&mut result.definitions);
+ value.done = false;
- divide_events(&mut map, events, index, &mut tokenizer.events);
-
- done = false;
+ acc = divide_events(&mut map, events, index, &mut tokenizer.events, acc);
}
}
@@ -156,7 +178,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result<
map.consume(events);
- Ok(done)
+ Ok(value)
}
/// Divide `child_events` over links in `events`, the first of which is at
@@ -166,15 +188,17 @@ pub fn divide_events(
events: &[Event],
mut link_index: usize,
child_events: &mut Vec<Event>,
-) {
+ acc_before: (usize, usize),
+) -> (usize, usize) {
// Loop through `child_events` to figure out which parts belong where and
// fix deep links.
let mut child_index = 0;
let mut slices = vec![];
let mut slice_start = 0;
let mut old_prev: Option<usize> = None;
+ let len = child_events.len();
- while child_index < child_events.len() {
+ while child_index < len {
let current = &child_events[child_index].point;
let end = &events[link_index + 1].point;
@@ -200,7 +224,8 @@ pub fn divide_events(
} else {
old_prev + link_index - (slices.len() - 1) * 2
};
- prev_event.link.as_mut().unwrap().next = Some(new_link);
+ prev_event.link.as_mut().unwrap().next =
+ Some(new_link + acc_before.1 - acc_before.0);
}
}
@@ -219,7 +244,9 @@ pub fn divide_events(
// The `index` in `events` where the current link is,
// minus 2 events (the enter and exit) for each removed
// link.
- .map(|previous| previous + link_index - (slices.len() * 2));
+ .map(|previous| {
+ previous + link_index - (slices.len() * 2) + acc_before.1 - acc_before.0
+ });
}
}
@@ -245,4 +272,6 @@ pub fn divide_events(
child_events.split_off(slices[index].1),
);
}
+
+ (acc_before.0 + (slices.len() * 2), acc_before.1 + len)
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 5095abb..8441f7e 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -12,6 +12,7 @@ use crate::event::{Content, Event, Kind, Link, Name, Point, VOID_EVENTS};
use crate::parser::ParseState;
use crate::resolve::{call as call_resolve, Name as ResolveName};
use crate::state::{call, State};
+use crate::subtokenize::Subresult;
use crate::util::{char::format_byte_opt, constant::TAB_SIZE, edit_map::EditMap};
use alloc::{boxed::Box, string::String, vec, vec::Vec};
@@ -609,23 +610,35 @@ impl<'a> Tokenizer<'a> {
}
/// Flush.
- pub fn flush(&mut self, state: State, resolve: bool) -> Result<(), String> {
+ pub fn flush(&mut self, state: State, resolve: bool) -> Result<Subresult, String> {
let to = (self.point.index, self.point.vs);
let state = push_impl(self, to, to, state, true);
- let result = state.to_result();
- if resolve && result.is_ok() {
+ state.to_result()?;
+
+ let mut value = Subresult {
+ done: false,
+ gfm_footnote_definitions: self.tokenize_state.gfm_footnote_definitions.split_off(0),
+ definitions: self.tokenize_state.definitions.split_off(0),
+ };
+
+ if resolve {
let resolvers = self.resolvers.split_off(0);
let mut index = 0;
while index < resolvers.len() {
- call_resolve(self, resolvers[index]);
+ if let Some(mut result) = call_resolve(self, resolvers[index])? {
+ value
+ .gfm_footnote_definitions
+ .append(&mut result.gfm_footnote_definitions);
+ value.definitions.append(&mut result.definitions);
+ }
index += 1;
}
self.map.consume(&mut self.events);
}
- result
+ Ok(value)
}
}
diff --git a/tests/definition.rs b/tests/definition.rs
index 11f783d..6f680ff 100644
--- a/tests/definition.rs
+++ b/tests/definition.rs
@@ -441,6 +441,42 @@ fn definition() -> Result<(), String> {
);
assert_eq!(
+ micromark("[\na\n=\n]: b"),
+ "<h1>[\na</h1>\n<p>]: b</p>",
+ "should prefer setext headings over definition labels"
+ );
+
+ assert_eq!(
+ micromark("[a]: b '\nc\n=\n'"),
+ "<h1>[a]: b '\nc</h1>\n<p>'</p>",
+ "should prefer setext headings over definition titles"
+ );
+
+ assert_eq!(
+ micromark("[\n***\n]: b"),
+ "<p>[</p>\n<hr />\n<p>]: b</p>",
+ "should prefer thematic breaks over definition labels"
+ );
+
+ assert_eq!(
+ micromark("[a]: b '\n***\n'"),
+ "<p>[a]: b '</p>\n<hr />\n<p>'</p>",
+ "should prefer thematic breaks over definition titles"
+ );
+
+ assert_eq!(
+ micromark("[\n```\n]: b"),
+ "<p>[</p>\n<pre><code>]: b\n</code></pre>\n",
+ "should prefer code (fenced) over definition labels"
+ );
+
+ assert_eq!(
+ micromark("[a]: b '\n```\n'"),
+ "<p>[a]: b '</p>\n<pre><code>'\n</code></pre>\n",
+ "should prefer code (fenced) over definition titles"
+ );
+
+ assert_eq!(
micromark_with_options(
"[foo]: /url \"title\"",
&Options {
diff --git a/tests/fuzz.rs b/tests/fuzz.rs
index 146ff24..47dbea5 100644
--- a/tests/fuzz.rs
+++ b/tests/fuzz.rs
@@ -6,7 +6,7 @@ use pretty_assertions::assert_eq;
fn fuzz() -> Result<(), String> {
assert_eq!(
micromark("[\n~\na\n-\n\n"),
- "<p>[\n~\na</p>\n<ul>\n<li></li>\n</ul>\n",
+ "<h2>[\n~\na</h2>\n",
"1: label, blank lines, and code"
);
diff --git a/tests/gfm_table.rs b/tests/gfm_table.rs
index 619bf2a..b7f884a 100644
--- a/tests/gfm_table.rs
+++ b/tests/gfm_table.rs
@@ -338,6 +338,12 @@ fn gfm_table() -> Result<(), String> {
);
assert_eq!(
+ micromark_with_options("[\na\n:-\n]: b", &gfm)?,
+ "<p>[</p>\n<table>\n<thead>\n<tr>\n<th align=\"left\">a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td align=\"left\">]: b</td>\n</tr>\n</tbody>\n</table>",
+ "should prefer GFM tables over definitions"
+ );
+
+ assert_eq!(
micromark_with_options(
r###"# Align