aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/construct/attention.rs6
-rw-r--r--src/construct/content.rs188
-rw-r--r--src/construct/definition.rs26
-rw-r--r--src/construct/document.rs5
-rw-r--r--src/construct/flow.rs33
-rw-r--r--src/construct/gfm_table.rs61
-rw-r--r--src/construct/heading_atx.rs7
-rw-r--r--src/construct/heading_setext.rs137
-rw-r--r--src/construct/label_end.rs5
-rw-r--r--src/construct/list_item.rs7
-rw-r--r--src/construct/mod.rs4
-rw-r--r--src/construct/paragraph.rs149
-rw-r--r--src/construct/partial_data.rs7
-rw-r--r--src/construct/string.rs6
-rw-r--r--src/construct/text.rs6
15 files changed, 436 insertions, 211 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 4a208df..4d58610 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -79,6 +79,7 @@
use crate::event::{Event, Kind, Name, Point};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::{
char::{
@@ -87,6 +88,7 @@ use crate::util::{
},
slice::Slice,
};
+use alloc::string::String;
use alloc::{vec, vec::Vec};
/// Attentention sequence that we can take markers from.
@@ -150,7 +152,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve sequences.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
// Find all sequences, gather info about them.
let mut sequences = get_sequences(tokenizer);
@@ -221,6 +223,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
}
tokenizer.map.consume(&mut tokenizer.events);
+
+ Ok(None)
}
/// Get sequences.
diff --git a/src/construct/content.rs b/src/construct/content.rs
new file mode 100644
index 0000000..6c10cea
--- /dev/null
+++ b/src/construct/content.rs
@@ -0,0 +1,188 @@
+//! Content occurs in the [flow][] content type.
+//!
+//! Content contains zero or more [definition][definition]s, followed by zero
+//! or one [paragraph][].
+//!
+//! The constructs found in flow are:
+//!
+//! * [Definition][crate::construct::definition]
+//! * [Paragraph][crate::construct::paragraph]
+//!
+//! ## Tokens
+//!
+//! * [`Content`][Name::Content]
+//!
+//! > 👉 **Note**: while parsing, [`Content`][Name::Content]
+//! > is used, which is later compiled away.
+//!
+//! ## References
+//!
+//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)
+//!
+//! [flow]: crate::construct::flow
+//! [definition]: crate::construct::definition
+//! [paragraph]: crate::construct::paragraph
+
+use crate::event::{Content, Kind, Link, Name};
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::subtokenize::{subtokenize, Subresult};
+use crate::tokenizer::Tokenizer;
+use alloc::{string::String, vec};
+
+/// Before a content content.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// ```
+pub fn chunk_start(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => unreachable!("unexpected eol/eof"),
+ _ => {
+ tokenizer.enter_link(
+ Name::Content,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::Content,
+ },
+ );
+ State::Retry(StateName::ContentChunkInside)
+ }
+ }
+}
+
+/// In a content chunk.
+///
+/// ```markdown
+/// > | abc
+/// ^^^
+/// ```
+pub fn chunk_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::Content);
+ tokenizer.register_resolver_before(ResolveName::Content);
+ // You’d be interrupting.
+ tokenizer.interrupt = true;
+ State::Ok
+ }
+ _ => {
+ tokenizer.consume();
+ State::Next(StateName::ContentChunkInside)
+ }
+ }
+}
+
+/// Before a definition.
+///
+/// ```markdown
+/// > | [a]: b
+/// ^
+/// ```
+pub fn definition_before(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::ContentDefinitionAfter),
+ State::Next(StateName::ParagraphStart),
+ );
+ State::Retry(StateName::DefinitionStart)
+}
+
+/// After a definition.
+///
+/// ```markdown
+/// > | [a]: b
+/// ^
+/// | c
+/// ```
+pub fn definition_after(tokenizer: &mut Tokenizer) -> State {
+ debug_assert!(matches!(tokenizer.current, None | Some(b'\n')));
+ if tokenizer.current.is_none() {
+ State::Ok
+ } else {
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ State::Next(StateName::ContentDefinitionBefore)
+ }
+}
+
+/// Merge `Content` chunks, which currently span a single line, into actual
+/// `Content`s that span multiple lines.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+ let mut index = 0;
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.kind == Kind::Enter && event.name == Name::Content {
+ // Exit:Content
+ let mut exit_index = index + 1;
+
+ loop {
+ let mut enter_index = exit_index + 1;
+
+ if enter_index == tokenizer.events.len()
+ || tokenizer.events[enter_index].name != Name::LineEnding
+ {
+ break;
+ }
+
+ // Skip past line ending.
+ enter_index += 2;
+
+ // Skip past prefix.
+ while enter_index < tokenizer.events.len() {
+ let event = &tokenizer.events[enter_index];
+
+ if event.name != Name::SpaceOrTab
+ && event.name != Name::BlockQuotePrefix
+ && event.name != Name::BlockQuoteMarker
+ {
+ break;
+ }
+
+ enter_index += 1;
+ }
+
+ if enter_index == tokenizer.events.len()
+ || tokenizer.events[enter_index].name != Name::Content
+ {
+ break;
+ }
+
+ // Set Exit:Content point to Exit:LineEnding.
+ tokenizer.events[exit_index].point = tokenizer.events[exit_index + 2].point.clone();
+ // Remove Enter:LineEnding, Exit:LineEnding.
+ tokenizer.map.add(exit_index + 1, 2, vec![]);
+
+ // Link Enter:Content to Enter:Content on this line and vice versa.
+ tokenizer.events[exit_index - 1].link.as_mut().unwrap().next = Some(enter_index);
+ tokenizer.events[enter_index]
+ .link
+ .as_mut()
+ .unwrap()
+ .previous = Some(exit_index - 1);
+
+ // Potential next start.
+ exit_index = enter_index + 1;
+ }
+
+ // Move to `Exit:Content`.
+ index = exit_index;
+ }
+
+ index += 1;
+ }
+
+ tokenizer.map.consume(&mut tokenizer.events);
+
+ let result = subtokenize(
+ &mut tokenizer.events,
+ tokenizer.parse_state,
+ &Some(Content::Content),
+ )?;
+
+ Ok(Some(result))
+}
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 1071489..8ccfb90 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -1,4 +1,4 @@
-//! Definition occurs in the [flow] content type.
+//! Definition occurs in the [content] content type.
//!
//! ## Grammar
//!
@@ -12,8 +12,8 @@
//! ; those parts.
//! ```
//!
-//! As this construct occurs in flow, like all flow constructs, it must be
-//! followed by an eol (line ending) or eof (end of file).
+//! This construct must be followed by an eol (line ending) or eof (end of
+//! file), like flow constructs.
//!
//! See [`destination`][destination], [`label`][label], and [`title`][title]
//! for grammar, notes, and recommendations on each part.
@@ -88,7 +88,7 @@
//! * [`definition.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/definition.js)
//! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions)
//!
-//! [flow]: crate::construct::flow
+//! [content]: crate::construct::content
//! [string]: crate::construct::string
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
@@ -157,7 +157,10 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Name::DefinitionLabel;
tokenizer.tokenize_state.token_2 = Name::DefinitionLabelMarker;
tokenizer.tokenize_state.token_3 = Name::DefinitionLabelString;
- tokenizer.attempt(State::Next(StateName::DefinitionLabelAfter), State::Nok);
+ tokenizer.attempt(
+ State::Next(StateName::DefinitionLabelAfter),
+ State::Next(StateName::DefinitionLabelNok),
+ );
State::Retry(StateName::LabelStart)
}
_ => State::Nok,
@@ -192,6 +195,19 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State {
}
}
+/// At a non-label
+///
+/// ```markdown
+/// > | []
+/// ^
+/// ```
+pub fn label_nok(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ State::Nok
+}
+
/// After marker.
///
/// ```markdown
diff --git a/src/construct/document.rs b/src/construct/document.rs
index 45a961d..82f2ebd 100644
--- a/src/construct/document.rs
+++ b/src/construct/document.rs
@@ -413,7 +413,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
while !document_lazy_continuation_current && stack_index > 0 {
stack_index -= 1;
let name = &child.stack[stack_index];
- if name == &Name::Paragraph || name == &Name::Definition || name == &Name::GfmTableHead {
+ if name == &Name::Content || name == &Name::GfmTableHead {
document_lazy_continuation_current = true;
}
}
@@ -423,7 +423,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
if !document_lazy_continuation_current && !child.events.is_empty() {
let before = skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding]);
let name = &child.events[before].name;
- if name == &Name::Paragraph {
+ if name == &Name::Content {
document_lazy_continuation_current = true;
}
}
@@ -582,6 +582,7 @@ fn resolve(tokenizer: &mut Tokenizer) {
&tokenizer.events,
flow_index,
&mut child.events,
+ (0, 0),
);
// Replace the flow data with actual events.
diff --git a/src/construct/flow.rs b/src/construct/flow.rs
index e97ee63..08e0466 100644
--- a/src/construct/flow.rs
+++ b/src/construct/flow.rs
@@ -12,7 +12,6 @@
//!
//! * [Blank line][crate::construct::blank_line]
//! * [Code (indented)][crate::construct::code_indented]
-//! * [Definition][crate::construct::definition]
//! * [Heading (atx)][crate::construct::heading_atx]
//! * [Heading (setext)][crate::construct::heading_setext]
//! * [HTML (flow)][crate::construct::html_flow]
@@ -40,14 +39,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
Some(b'#') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::HeadingAtxStart)
}
Some(b'$' | b'`' | b'~') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::RawFlowStart)
}
@@ -56,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
Some(b'*' | b'_') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::ThematicBreakStart)
}
@@ -70,12 +69,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
Some(b'{') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::MdxExpressionFlowStart)
}
// Actual parsing: blank line? Indented code? Indented anything?
- // Tables, setext heading underlines, definitions, and paragraphs are
+ // Tables, setext heading underlines, definitions, and Contents are
// particularly weird.
_ => State::Retry(StateName::FlowBlankLineBefore),
}
@@ -217,34 +216,20 @@ pub fn before_mdx_expression(tokenizer: &mut Tokenizer) -> State {
pub fn before_gfm_table(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeDefinition),
+ State::Next(StateName::FlowBeforeContent),
);
State::Retry(StateName::GfmTableStart)
}
-/// At definition.
-///
-/// ```markdown
-/// > | [a]: b
-/// ^
-/// ```
-pub fn before_definition(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::DefinitionStart)
-}
-
-/// At paragraph.
+/// At content.
///
/// ```markdown
/// > | a
/// ^
/// ```
-pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
+pub fn before_content(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
- State::Retry(StateName::ParagraphStart)
+ State::Retry(StateName::ContentChunkStart)
}
/// After blank line.
diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs
index 27fbadf..63772c4 100644
--- a/src/construct/gfm_table.rs
+++ b/src/construct/gfm_table.rs
@@ -229,9 +229,10 @@ use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}
use crate::event::{Content, Event, Kind, Link, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back};
-use alloc::vec;
+use alloc::{string::String, vec};
/// Start of a GFM table.
///
@@ -771,15 +772,13 @@ pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve GFM table.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
let mut index = 0;
- // let mut tables = vec![];
let mut in_first_cell_awaiting_pipe = true;
let mut in_row = false;
let mut in_delimiter_row = false;
let mut last_cell = (0, 0, 0, 0);
let mut cell = (0, 0, 0, 0);
-
let mut after_head_awaiting_first_body_row = false;
let mut last_table_end = 0;
let mut last_table_has_body = false;
@@ -800,17 +799,14 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
}
// Inject table start.
- tokenizer.map.add(
- index,
- 0,
- vec![Event {
- kind: Kind::Enter,
- name: Name::GfmTable,
- point: tokenizer.events[index].point.clone(),
- link: None,
- }],
- );
- } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow {
+ let enter = Event {
+ kind: Kind::Enter,
+ name: Name::GfmTable,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ };
+ tokenizer.map.add(index, 0, vec![enter]);
+ } else if matches!(event.name, Name::GfmTableRow | Name::GfmTableDelimiterRow) {
in_delimiter_row = event.name == Name::GfmTableDelimiterRow;
in_row = true;
in_first_cell_awaiting_pipe = true;
@@ -821,23 +817,21 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
if after_head_awaiting_first_body_row {
after_head_awaiting_first_body_row = false;
last_table_has_body = true;
- tokenizer.map.add(
- index,
- 0,
- vec![Event {
- kind: Kind::Enter,
- name: Name::GfmTableBody,
- point: tokenizer.events[index].point.clone(),
- link: None,
- }],
- );
+ let enter = Event {
+ kind: Kind::Enter,
+ name: Name::GfmTableBody,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ };
+ tokenizer.map.add(index, 0, vec![enter]);
}
}
// Cell data.
else if in_row
- && (event.name == Name::Data
- || event.name == Name::GfmTableDelimiterMarker
- || event.name == Name::GfmTableDelimiterFiller)
+ && matches!(
+ event.name,
+ Name::Data | Name::GfmTableDelimiterMarker | Name::GfmTableDelimiterFiller
+ )
{
in_first_cell_awaiting_pipe = false;
@@ -868,7 +862,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
} else if event.name == Name::GfmTableHead {
after_head_awaiting_first_body_row = true;
last_table_end = index;
- } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow {
+ } else if matches!(event.name, Name::GfmTableRow | Name::GfmTableDelimiterRow) {
in_row = false;
last_table_end = index;
if last_cell.1 != 0 {
@@ -878,9 +872,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
flush_cell(tokenizer, cell, in_delimiter_row, Some(index));
}
} else if in_row
- && (event.name == Name::Data
- || event.name == Name::GfmTableDelimiterMarker
- || event.name == Name::GfmTableDelimiterFiller)
+ && (matches!(
+ event.name,
+ Name::Data | Name::GfmTableDelimiterMarker | Name::GfmTableDelimiterFiller
+ ))
{
cell.3 = index;
}
@@ -891,6 +886,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
if last_table_end != 0 {
flush_table_end(tokenizer, last_table_end, last_table_has_body);
}
+
+ Ok(None)
}
/// Generate a cell.
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index c1090c4..b76e455 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -66,9 +66,10 @@ use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}
use crate::event::{Content, Event, Kind, Link, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
-use alloc::vec;
+use alloc::{string::String, vec};
/// Start of a heading (atx).
///
@@ -222,7 +223,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve heading (atx).
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
let mut index = 0;
let mut heading_inside = false;
let mut data_start: Option<usize> = None;
@@ -281,4 +282,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
index += 1;
}
+
+ Ok(None)
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index e9cc759..3a484e1 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -54,6 +54,7 @@
//! * [`HeadingSetext`][Name::HeadingSetext]
//! * [`HeadingSetextText`][Name::HeadingSetextText]
//! * [`HeadingSetextUnderline`][Name::HeadingSetextUnderline]
+//! * [`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence]
//!
//! ## References
//!
@@ -70,12 +71,13 @@
//! [atx]: http://www.aaronsw.com/2002/atx/
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
-use crate::event::{Kind, Name};
+use crate::event::{Content, Event, Kind, Link, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
-use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back};
-use alloc::vec;
+use crate::util::{constant::TAB_SIZE, skip};
+use alloc::{string::String, vec};
/// At start of heading (setext) underline.
///
@@ -90,14 +92,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
&& !tokenizer.pierce
// Require a paragraph before.
&& (!tokenizer.events.is_empty()
- && tokenizer.events[skip_opt_back(
+ && tokenizer.events[skip::opt_back(
&tokenizer.events,
tokenizer.events.len() - 1,
&[Name::LineEnding, Name::SpaceOrTab],
)]
.name
- == Name::Paragraph)
+ == Name::Content)
{
+ tokenizer.enter(Name::HeadingSetextUnderline);
+
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.attempt(State::Next(StateName::HeadingSetextBefore), State::Nok);
State::Retry(space_or_tab_min_max(
@@ -128,7 +132,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-' | b'=') => {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
- tokenizer.enter(Name::HeadingSetextUnderline);
+ tokenizer.enter(Name::HeadingSetextUnderlineSequence);
State::Retry(StateName::HeadingSetextInside)
}
_ => State::Nok,
@@ -148,7 +152,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
State::Next(StateName::HeadingSetextInside)
} else {
tokenizer.tokenize_state.marker = 0;
- tokenizer.exit(Name::HeadingSetextUnderline);
+ tokenizer.exit(Name::HeadingSetextUnderlineSequence);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.attempt(State::Next(StateName::HeadingSetextAfter), State::Nok);
@@ -172,6 +176,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
// Feel free to interrupt.
tokenizer.interrupt = false;
tokenizer.register_resolver(ResolveName::HeadingSetext);
+ tokenizer.exit(Name::HeadingSetextUnderline);
State::Ok
}
_ => State::Nok,
@@ -179,42 +184,102 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve heading (setext).
-pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut index = 0;
- let mut paragraph_enter = None;
- let mut paragraph_exit = None;
-
- while index < tokenizer.events.len() {
- let event = &tokenizer.events[index];
-
- // Find paragraphs.
- if event.kind == Kind::Enter {
- if event.name == Name::Paragraph {
- paragraph_enter = Some(index);
- }
- } else if event.name == Name::Paragraph {
- paragraph_exit = Some(index);
- }
- // We know this is preceded by a paragraph.
- // Otherwise we don’t parse.
- else if event.name == Name::HeadingSetextUnderline {
- let enter = paragraph_enter.take().unwrap();
- let exit = paragraph_exit.take().unwrap();
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+ tokenizer.map.consume(&mut tokenizer.events);
+
+ let mut enter = skip::to(&tokenizer.events, 0, &[Name::HeadingSetextUnderline]);
+
+ while enter < tokenizer.events.len() {
+ let exit = skip::to(
+ &tokenizer.events,
+ enter + 1,
+ &[Name::HeadingSetextUnderline],
+ );
+
+ // Find paragraph before
+ let paragraph_exit_before = skip::opt_back(
+ &tokenizer.events,
+ enter - 1,
+ &[Name::SpaceOrTab, Name::LineEnding, Name::BlockQuotePrefix],
+ );
+
+ // There’s a paragraph before: this is a setext heading.
+ if tokenizer.events[paragraph_exit_before].name == Name::Paragraph {
+ let paragraph_enter = skip::to_back(
+ &tokenizer.events,
+ paragraph_exit_before - 1,
+ &[Name::Paragraph],
+ );
// Change types of Enter:Paragraph, Exit:Paragraph.
- tokenizer.events[enter].name = Name::HeadingSetextText;
- tokenizer.events[exit].name = Name::HeadingSetextText;
+ tokenizer.events[paragraph_enter].name = Name::HeadingSetextText;
+ tokenizer.events[paragraph_exit_before].name = Name::HeadingSetextText;
// Add Enter:HeadingSetext, Exit:HeadingSetext.
- let mut heading_enter = tokenizer.events[enter].clone();
+ let mut heading_enter = tokenizer.events[paragraph_enter].clone();
heading_enter.name = Name::HeadingSetext;
- let mut heading_exit = tokenizer.events[index].clone();
+ tokenizer.map.add(paragraph_enter, 0, vec![heading_enter]);
+ let mut heading_exit = tokenizer.events[exit].clone();
heading_exit.name = Name::HeadingSetext;
-
- tokenizer.map.add(enter, 0, vec![heading_enter]);
- tokenizer.map.add(index + 1, 0, vec![heading_exit]);
+ tokenizer.map.add(exit + 1, 0, vec![heading_exit]);
+ } else {
+ // There’s a following paragraph, move this underline inside it.
+ if exit + 3 < tokenizer.events.len()
+ && tokenizer.events[exit + 1].name == Name::LineEnding
+ && tokenizer.events[exit + 3].name == Name::Paragraph
+ {
+ // Swap type, HeadingSetextUnderline:Enter -> Paragraph:Enter.
+ tokenizer.events[enter].name = Name::Paragraph;
+ // Swap type, LineEnding -> Data.
+ tokenizer.events[exit + 1].name = Name::Data;
+ tokenizer.events[exit + 2].name = Name::Data;
+ // Move new data (was line ending) back to include whole line,
+ // and link data together.
+ tokenizer.events[exit + 1].point = tokenizer.events[enter].point.clone();
+ tokenizer.events[exit + 1].link = Some(Link {
+ previous: None,
+ next: Some(exit + 4),
+ content: Content::Text,
+ });
+ tokenizer.events[exit + 4].link.as_mut().unwrap().previous = Some(exit + 1);
+ // Remove *including* HeadingSetextUnderline:Exit, until the line ending.
+ tokenizer.map.add(enter + 1, exit - enter, vec![]);
+ // Remove old Paragraph:Enter.
+ tokenizer.map.add(exit + 3, 1, vec![]);
+ } else {
+ // Swap type.
+ tokenizer.events[enter].name = Name::Paragraph;
+ tokenizer.events[exit].name = Name::Paragraph;
+ // Replace what’s inside the underline (whitespace, sequence).
+ tokenizer.map.add(
+ enter + 1,
+ exit - enter - 1,
+ vec![
+ Event {
+ name: Name::Data,
+ kind: Kind::Enter,
+ point: tokenizer.events[enter].point.clone(),
+ link: Some(Link {
+ previous: None,
+ next: None,
+ content: Content::Text,
+ }),
+ },
+ Event {
+ name: Name::Data,
+ kind: Kind::Exit,
+ point: tokenizer.events[exit].point.clone(),
+ link: None,
+ },
+ ],
+ );
+ }
}
- index += 1;
+ enter = skip::to(&tokenizer.events, exit + 1, &[Name::HeadingSetextUnderline]);
}
+
+ tokenizer.map.consume(&mut tokenizer.events);
+
+ Ok(None)
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index ce1c295..95b9a27 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -183,6 +183,7 @@ use crate::construct::partial_space_or_tab_eol::space_or_tab_eol;
use crate::event::{Event, Kind, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::{Label, LabelKind, LabelStart, Tokenizer};
use crate::util::{
constant::RESOURCE_DESTINATION_BALANCE_MAX,
@@ -660,7 +661,7 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {
///
/// This turns matching label starts and label ends into links, images, and
/// footnotes, and turns unmatched label starts back into data.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
// Inject labels.
let labels = tokenizer.tokenize_state.labels.split_off(0);
inject_labels(tokenizer, &labels);
@@ -671,6 +672,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
mark_as_data(tokenizer, &starts);
tokenizer.map.consume(&mut tokenizer.events);
+
+ Ok(None)
}
/// Inject links/images/footnotes.
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
index 658c2c7..13b740b 100644
--- a/src/construct/list_item.rs
+++ b/src/construct/list_item.rs
@@ -62,13 +62,14 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max;
use crate::event::{Kind, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::{
constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE},
skip,
slice::{Position, Slice},
};
-use alloc::{vec, vec::Vec};
+use alloc::{string::String, vec, vec::Vec};
/// Start of list item.
///
@@ -370,7 +371,7 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
}
/// Find adjacent list items with the same marker.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
let mut index = 0;
@@ -472,4 +473,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
index += 1;
}
+
+ Ok(None)
}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 1afa105..ae6facf 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -16,7 +16,7 @@
//! Content types also have a *rest* thing: after all things are parsed,
//! there’s something left.
//! In document, that is [flow][].
-//! In flow, that is a [paragraph][].
+//! In flow, that is [content][].
//! In string and text, that is [data][partial_data].
//!
//! ## Construct
@@ -37,6 +37,7 @@
//! * [character escape][character_escape]
//! * [character reference][character_reference]
//! * [code (indented)][code_indented]
+//! * [content][]
//! * [definition][]
//! * [hard break (escape)][hard_break_escape]
//! * [heading (atx)][heading_atx]
@@ -149,6 +150,7 @@ pub mod block_quote;
pub mod character_escape;
pub mod character_reference;
pub mod code_indented;
+pub mod content;
pub mod definition;
pub mod document;
pub mod flow;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index c1e7311..78fbacb 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -1,4 +1,4 @@
-//! Paragraph occurs in the [flow][] content type.
+//! Paragraph occurs in the [content][] content type.
//!
//! ## Grammar
//!
@@ -11,14 +11,15 @@
//! paragraph ::= 1*line *(eol 1*line)
//! ```
//!
-//! As this construct occurs in flow, like all flow constructs, it must be
-//! followed by an eol (line ending) or eof (end of file).
+//! This construct must be followed by an eol (line ending) or eof (end of
+//! file), like flow constructs.
//!
//! Paragraphs can contain line endings and whitespace, but they are not
//! allowed to contain blank lines, or to be blank themselves.
//!
//! The paragraph is interpreted as the [text][] content type.
-//! That means that [autolinks][autolink], [code (text)][raw_text], etc are allowed.
+//! That means that [autolinks][autolink], [code (text)][raw_text], etc are
+//! allowed.
//!
//! ## HTML
//!
@@ -34,40 +35,57 @@
//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)
//! * [*§ 4.8 Paragraphs* in `CommonMark`](https://spec.commonmark.org/0.30/#paragraphs)
//!
-//! [flow]: crate::construct::flow
+//! [content]: crate::construct::content
//! [text]: crate::construct::text
//! [autolink]: crate::construct::autolink
//! [raw_text]: crate::construct::raw_text
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
-use crate::event::{Content, Kind, Link, Name};
-use crate::resolve::Name as ResolveName;
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
+use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
-use alloc::vec;
-/// Before paragraph.
+/// Paragraph start.
///
/// ```markdown
/// > | abc
/// ^
+/// | def
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => unreachable!("unexpected eol/eof"),
- _ => {
- tokenizer.enter(Name::Paragraph);
- tokenizer.enter_link(
- Name::Data,
- Link {
- previous: None,
- next: None,
- content: Content::Text,
- },
- );
- State::Retry(StateName::ParagraphInside)
- }
+ debug_assert!(tokenizer.current.is_some());
+ tokenizer.enter(Name::Paragraph);
+ State::Retry(StateName::ParagraphLineStart)
+}
+
+/// Start of a line in a paragraph.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// > | def
+/// ^
+/// ```
+pub fn line_start(tokenizer: &mut Tokenizer) -> State {
+ debug_assert!(tokenizer.current.is_some());
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::Text,
+ },
+ );
+
+ if tokenizer.tokenize_state.connect {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ } else {
+ tokenizer.tokenize_state.connect = true;
}
+
+ State::Retry(StateName::ParagraphInside)
}
/// In paragraph.
@@ -78,91 +96,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') => {
+ None => {
+ tokenizer.tokenize_state.connect = false;
tokenizer.exit(Name::Data);
tokenizer.exit(Name::Paragraph);
- tokenizer.register_resolver_before(ResolveName::Paragraph);
- // You’d be interrupting.
- tokenizer.interrupt = true;
State::Ok
}
+ Some(b'\n') => {
+ tokenizer.consume();
+ tokenizer.exit(Name::Data);
+ State::Next(StateName::ParagraphLineStart)
+ }
_ => {
tokenizer.consume();
State::Next(StateName::ParagraphInside)
}
}
}
-
-/// Merge “`Paragraph`”s, which currently span a single line, into actual
-/// `Paragraph`s that span multiple lines.
-pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut index = 0;
-
- while index < tokenizer.events.len() {
- let event = &tokenizer.events[index];
-
- if event.kind == Kind::Enter && event.name == Name::Paragraph {
- // Exit:Paragraph
- let mut exit_index = index + 3;
-
- loop {
- let mut enter_index = exit_index + 1;
-
- if enter_index == tokenizer.events.len()
- || tokenizer.events[enter_index].name != Name::LineEnding
- {
- break;
- }
-
- enter_index += 2;
-
- while enter_index < tokenizer.events.len() {
- let event = &tokenizer.events[enter_index];
-
- if event.name != Name::SpaceOrTab
- && event.name != Name::BlockQuotePrefix
- && event.name != Name::BlockQuoteMarker
- {
- break;
- }
-
- enter_index += 1;
- }
-
- if enter_index == tokenizer.events.len()
- || tokenizer.events[enter_index].name != Name::Paragraph
- {
- break;
- }
-
- // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding.
- tokenizer.map.add(exit_index, 3, vec![]);
-
- // Remove Enter:Paragraph.
- tokenizer.map.add(enter_index, 1, vec![]);
-
- // Add Exit:LineEnding position info to Exit:Data.
- tokenizer.events[exit_index - 1].point =
- tokenizer.events[exit_index + 2].point.clone();
-
- // Link Enter:Data on the previous line to Enter:Data on this line.
- if let Some(link) = &mut tokenizer.events[exit_index - 2].link {
- link.next = Some(enter_index + 1);
- }
- if let Some(link) = &mut tokenizer.events[enter_index + 1].link {
- link.previous = Some(exit_index - 2);
- }
-
- // Potential next start.
- exit_index = enter_index + 3;
- }
-
- // Move to `Exit:Paragraph`.
- index = exit_index;
- }
-
- index += 1;
- }
-
- tokenizer.map.consume(&mut tokenizer.events);
-}
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index b6f1f47..b36d9f0 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -8,8 +8,9 @@
use crate::event::{Kind, Name};
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
-use alloc::vec;
+use alloc::{string::String, vec};
/// At beginning of data.
///
@@ -72,7 +73,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
/// Merge adjacent data events.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
let mut index = 0;
// Loop through events and merge adjacent data events.
@@ -103,4 +104,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
index += 1;
}
+
+ Ok(None)
}
diff --git a/src/construct/string.rs b/src/construct/string.rs
index dba1ac1..cf2f222 100644
--- a/src/construct/string.rs
+++ b/src/construct/string.rs
@@ -15,7 +15,9 @@
use crate::construct::partial_whitespace::resolve_whitespace;
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
+use alloc::string::String;
/// Characters that can start something in string.
const MARKERS: [u8; 2] = [b'&', b'\\'];
@@ -74,6 +76,8 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve whitespace in string.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
resolve_whitespace(tokenizer, false, false);
+
+ Ok(None)
}
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 34ea071..2648531 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -28,7 +28,9 @@ use crate::construct::gfm_autolink_literal::resolve as resolve_gfm_autolink_lite
use crate::construct::partial_whitespace::resolve_whitespace;
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
+use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
+use alloc::string::String;
/// Characters that can start something in text.
const MARKERS: [u8; 16] = [
@@ -242,7 +244,7 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve whitespace.
-pub fn resolve(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
resolve_whitespace(
tokenizer,
tokenizer.parse_state.options.constructs.hard_break_trailing,
@@ -257,4 +259,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
{
resolve_gfm_autolink_literal(tokenizer);
}
+
+ Ok(None)
}