diff options
Diffstat (limited to '')
28 files changed, 420 insertions, 440 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index e974fae..3a29d06 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -60,7 +60,7 @@ use crate::util::slice::Slice; /// Character code kinds. #[derive(Debug, PartialEq)] -enum GroupKind { +enum CharacterKind { /// Whitespace. /// /// ## Example @@ -98,7 +98,7 @@ struct Sequence { /// The depth in events where this sequence resides. balance: usize, /// The index into events where this sequence’s `Enter` currently resides. - event_index: usize, + index: usize, /// The (shifted) point where this sequence starts. start_point: Point, /// The (shifted) point where this sequence end. @@ -111,7 +111,7 @@ struct Sequence { close: bool, } -/// Before a sequence. +/// At start of attention. /// /// ```markdown /// > | ** @@ -128,7 +128,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// In a sequence. +/// In sequence. /// /// ```markdown /// > | ** @@ -136,7 +136,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { + Some(b'*' | b'_') if tokenizer.current == Some(tokenizer.tokenize_state.marker) => { tokenizer.consume(); State::Next(StateName::AttentionInside) } @@ -150,28 +150,28 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } /// Resolve attention sequences. -#[allow(clippy::too_many_lines)] pub fn resolve(tokenizer: &mut Tokenizer) { - let mut start = 0; + let mut index = 0; let mut balance = 0; let mut sequences = vec![]; - // Find sequences of sequences and information about them. - while start < tokenizer.events.len() { - let enter = &tokenizer.events[start]; + // Find all sequences, gather info about them. + while index < tokenizer.events.len() { + let enter = &tokenizer.events[index]; if enter.kind == Kind::Enter { balance += 1; if enter.name == Name::AttentionSequence { - let end = start + 1; + let end = index + 1; let exit = &tokenizer.events[end]; let before_end = enter.point.index; let before_start = if before_end < 4 { 0 } else { before_end - 4 }; - let string_before = - String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]); - let char_before = string_before.chars().last(); + let char_before = + String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]) + .chars() + .last(); let after_start = exit.point.index; let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() { @@ -179,26 +179,27 @@ pub fn resolve(tokenizer: &mut Tokenizer) { } else { after_start + 4 }; - let string_after = - String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]); - let char_after = string_after.chars().next(); + let char_after = + String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]) + .chars() + .next(); let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point) .head() .unwrap(); let before = classify_character(char_before); let after = classify_character(char_after); - let open = after == GroupKind::Other - || (after == GroupKind::Punctuation && before != GroupKind::Other); + let open = after == CharacterKind::Other + || (after == CharacterKind::Punctuation && before != CharacterKind::Other); // To do: GFM strikethrough? - // || attentionMarkers.includes(code) - let close = before == GroupKind::Other - || (before == GroupKind::Punctuation && after != GroupKind::Other); + // || char_after == '~' + let close = before == CharacterKind::Other + || (before == CharacterKind::Punctuation && after != CharacterKind::Other); // To do: GFM strikethrough? - // || attentionMarkers.includes(previous) + // || char_before == '~' sequences.push(Sequence { - event_index: start, + index, balance, start_point: enter.point.clone(), end_point: exit.point.clone(), @@ -206,12 +207,12 @@ pub fn resolve(tokenizer: &mut Tokenizer) { open: if marker == b'*' { open } else { - open && (before != GroupKind::Other || !close) + open && (before != CharacterKind::Other || !close) }, close: if marker == b'*' { close } else { - close && (after != GroupKind::Other || !open) + close && (after != CharacterKind::Other || !open) }, marker, }); @@ -220,10 +221,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) { balance -= 1; } - start += 1; + index += 1; } - // Walk through sequences and match them. + // Now walk through them and match them. let mut close = 0; while close < sequences.len() { @@ -240,7 +241,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { let sequence_open = &sequences[open]; - // We found a sequence that can open the closer we found. + // An opener matching our closer: if sequence_open.open && sequence_close.marker == sequence_open.marker && sequence_close.balance == sequence_open.balance @@ -257,175 +258,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { } // We’ve found a match! - - // Number of markers to use from the sequence. - let take = if sequence_open.size > 1 && sequence_close.size > 1 { - 2 - } else { - 1 - }; - - // We’re *on* a closing sequence, with a matching opening - // sequence. - // Now we make sure that we can’t have misnested attention: - // - // ```html - // <em>a <strong>b</em> c</strong> - // ``` - // - // Do that by marking everything between it as no longer - // possible to open anything. - // Theoretically we could mark non-closing as well, but we - // don’t look for closers backwards. - let mut between = open + 1; - - while between < close { - sequences[between].open = false; - between += 1; - } - - let sequence_close = &mut sequences[close]; - let close_event_index = sequence_close.event_index; - let seq_close_enter = sequence_close.start_point.clone(); - // No need to worry about `VS`, because sequences are only actual characters. - sequence_close.size -= take; - sequence_close.start_point.column += take; - sequence_close.start_point.index += take; - let seq_close_exit = sequence_close.start_point.clone(); - - // Stay on this closing sequence for the next iteration: it - // might close more things. - next_index -= 1; - - // Remove closing sequence if fully used. - if sequence_close.size == 0 { - sequences.remove(close); - tokenizer.map.add(close_event_index, 2, vec![]); - } else { - // Shift remaining closing sequence forward. - // Do it here because a sequence can open and close different - // other sequences, and the remainder can be on any side or - // somewhere in the middle. - let mut enter = &mut tokenizer.events[close_event_index]; - enter.point = seq_close_exit.clone(); - } - - let sequence_open = &mut sequences[open]; - let open_event_index = sequence_open.event_index; - let seq_open_exit = sequence_open.end_point.clone(); - // No need to worry about `VS`, because sequences are only actual characters. - sequence_open.size -= take; - sequence_open.end_point.column -= take; - sequence_open.end_point.index -= take; - let seq_open_enter = sequence_open.end_point.clone(); - - // Remove opening sequence if fully used. - if sequence_open.size == 0 { - sequences.remove(open); - tokenizer.map.add(open_event_index, 2, vec![]); - next_index -= 1; - } else { - // Shift remaining opening sequence backwards. - // See note above for why that happens here. - let mut exit = &mut tokenizer.events[open_event_index + 1]; - exit.point = seq_open_enter.clone(); - } - - // Opening. - tokenizer.map.add_before( - // Add after the current sequence (it might remain). - open_event_index + 2, - 0, - vec![ - Event { - kind: Kind::Enter, - name: if take == 1 { - Name::Emphasis - } else { - Name::Strong - }, - point: seq_open_enter.clone(), - link: None, - }, - Event { - kind: Kind::Enter, - name: if take == 1 { - Name::EmphasisSequence - } else { - Name::StrongSequence - }, - point: seq_open_enter.clone(), - link: None, - }, - Event { - kind: Kind::Exit, - name: if take == 1 { - Name::EmphasisSequence - } else { - Name::StrongSequence - }, - point: seq_open_exit.clone(), - link: None, - }, - Event { - kind: Kind::Enter, - name: if take == 1 { - Name::EmphasisText - } else { - Name::StrongText - }, - point: seq_open_exit.clone(), - link: None, - }, - ], - ); - // Closing. - tokenizer.map.add( - close_event_index, - 0, - vec![ - Event { - kind: Kind::Exit, - name: if take == 1 { - Name::EmphasisText - } else { - Name::StrongText - }, - point: seq_close_enter.clone(), - link: None, - }, - Event { - kind: Kind::Enter, - name: if take == 1 { - Name::EmphasisSequence - } else { - Name::StrongSequence - }, - point: seq_close_enter.clone(), - link: None, - }, - Event { - kind: Kind::Exit, - name: if take == 1 { - Name::EmphasisSequence - } else { - Name::StrongSequence - }, - point: seq_close_exit.clone(), - link: None, - }, - Event { - kind: Kind::Exit, - name: if take == 1 { - Name::Emphasis - } else { - Name::Strong - }, - point: seq_close_exit.clone(), - link: None, - }, - ], - ); + next_index = match_sequences(tokenizer, &mut sequences, open, close); break; } @@ -439,14 +272,159 @@ pub fn resolve(tokenizer: &mut Tokenizer) { let mut index = 0; while index < sequences.len() { let sequence = &sequences[index]; - tokenizer.events[sequence.event_index].name = Name::Data; - tokenizer.events[sequence.event_index + 1].name = Name::Data; + tokenizer.events[sequence.index].name = Name::Data; + tokenizer.events[sequence.index + 1].name = Name::Data; index += 1; } tokenizer.map.consume(&mut tokenizer.events); } +/// Match two sequences. +fn match_sequences( + tokenizer: &mut Tokenizer, + sequences: &mut Vec<Sequence>, + open: usize, + close: usize, +) -> usize { + // Where to move to next. + // Stay on this closing sequence for the next iteration: it + // might close more things. + // It’s changed if sequences are removed. + let mut next = close; + + // Number of markers to use from the sequence. + let take = if sequences[open].size > 1 && sequences[close].size > 1 { + 2 + } else { + 1 + }; + + // We’re *on* a closing sequence, with a matching opening + // sequence. + // Now we make sure that we can’t have misnested attention: + // + // ```html + // <em>a <strong>b</em> c</strong> + // ``` + // + // Do that by marking everything between it as no longer + // possible to open anything. + // Theoretically we should mark as `close: false` too, but + // we don’t look for closers backwards, so it’s not needed. + let mut between = open + 1; + + while between < close { + sequences[between].open = false; + between += 1; + } + + let (group_name, seq_name, text_name) = if take == 1 { + (Name::Emphasis, Name::EmphasisSequence, Name::EmphasisText) + } else { + (Name::Strong, Name::StrongSequence, Name::StrongText) + }; + let open_index = sequences[open].index; + let close_index = sequences[close].index; + let open_exit = sequences[open].end_point.clone(); + let close_enter = sequences[close].start_point.clone(); + + // No need to worry about `VS`, because sequences are only actual characters. + sequences[open].size -= take; + sequences[close].size -= take; + sequences[open].end_point.column -= take; + sequences[open].end_point.index -= take; + sequences[close].start_point.column += take; + sequences[close].start_point.index += take; + + // Opening. + tokenizer.map.add_before( + // Add after the current sequence (it might remain). + open_index + 2, + 0, + vec![ + Event { + kind: Kind::Enter, + name: group_name.clone(), + point: sequences[open].end_point.clone(), + link: None, + }, + Event { + kind: Kind::Enter, + name: seq_name.clone(), + point: sequences[open].end_point.clone(), + link: None, + }, + Event { + kind: Kind::Exit, + name: seq_name.clone(), + point: open_exit.clone(), + link: None, + }, + Event { + kind: Kind::Enter, + name: text_name.clone(), + point: open_exit, + link: None, + }, + ], + ); + // Closing. + tokenizer.map.add( + close_index, + 0, + vec![ + Event { + kind: Kind::Exit, + name: text_name, + point: close_enter.clone(), + link: None, + }, + Event { + kind: Kind::Enter, + name: seq_name.clone(), + point: close_enter, + link: None, + }, + Event { + kind: Kind::Exit, + name: seq_name, + point: sequences[close].start_point.clone(), + link: None, + }, + Event { + kind: Kind::Exit, + name: group_name, + point: sequences[close].start_point.clone(), + link: None, + }, + ], + ); + + // Remove closing sequence if fully used. + if sequences[close].size == 0 { + sequences.remove(close); + tokenizer.map.add(close_index, 2, vec![]); + } else { + // Shift remaining closing sequence forward. + // Do it here because a sequence can open and close different + // other sequences, and the remainder can be on any side or + // somewhere in the middle. + tokenizer.events[close_index].point = sequences[close].start_point.clone(); + } + + if sequences[open].size == 0 { + sequences.remove(open); + tokenizer.map.add(open_index, 2, vec![]); + // Everything shifts one to the left, account for it in next iteration. + next -= 1; + } else { + tokenizer.events[open_index + 1].point = sequences[open].end_point.clone(); + } + + next +} + /// Classify whether a character code represents whitespace, punctuation, or /// something else. /// @@ -458,15 +436,15 @@ pub fn resolve(tokenizer: &mut Tokenizer) { /// ## References /// /// * [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js) -fn classify_character(char: Option<char>) -> GroupKind { +fn classify_character(char: Option<char>) -> CharacterKind { match char { // EOF. - None => GroupKind::Whitespace, + None => CharacterKind::Whitespace, // Unicode whitespace. - Some(char) if char.is_whitespace() => GroupKind::Whitespace, + Some(char) if char.is_whitespace() => CharacterKind::Whitespace, // Unicode punctuation. - Some(char) if PUNCTUATION.contains(&char) => GroupKind::Punctuation, + Some(char) if PUNCTUATION.contains(&char) => CharacterKind::Punctuation, // Everything else. - Some(_) => GroupKind::Other, + Some(_) => CharacterKind::Other, } } diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index c0d9ae3..1bb8004 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -106,7 +106,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of an autolink. +/// Start of autolink. /// /// ```markdown /// > | a<https://example.com>b @@ -128,7 +128,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After `<`, before the protocol. +/// After `<`, at protocol or atext. /// /// ```markdown /// > | a<https://example.com>b @@ -147,7 +147,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { } } -/// After the first byte of the protocol or email name. +/// At second byte of protocol or atext. /// /// ```markdown /// > | a<https://example.com>b @@ -167,7 +167,7 @@ pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { } } -/// Inside an ambiguous protocol or email name. +/// In ambiguous protocol or atext. /// /// ```markdown /// > | a<https://example.com>b @@ -197,7 +197,7 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a URL, after the protocol. +/// After protocol, in URL. /// /// ```markdown /// > | a<https://example.com>b @@ -222,7 +222,7 @@ pub fn url_inside(tokenizer: &mut Tokenizer) -> State { } } -/// Inside email atext. +/// In email atext. /// /// ```markdown /// > | a<user.name@example.com>b @@ -261,7 +261,7 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State { } } -/// After an at-sign or a dot in the label. +/// In label, after at-sign or dot. /// /// ```markdown /// > | a<user.name@example.com>b @@ -277,7 +277,7 @@ pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { } } -/// In the label, where `.` and `>` are allowed. +/// In label, where `.` and `>` are allowed. /// /// ```markdown /// > | a<user.name@example.com>b @@ -307,7 +307,7 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State { } } -/// In the label, where `.` and `>` are *not* allowed. +/// In label, where `.` and `>` are *not* allowed. /// /// Though, this is also used in `email_label` to parse other values. /// diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index eeef637..81b58fc 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -36,7 +36,7 @@ use crate::construct::partial_space_or_tab::space_or_tab; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of a blank line. +/// Start of blank line. /// /// > 👉 **Note**: `␠` represents a space character. /// @@ -55,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab(tokenizer)) } -/// After zero or more spaces or tabs, before a line ending or EOF. +/// At eof/eol, after optional whitespace. /// /// ```markdown /// > | ␠␠␊ diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 1a32f7d..6e660cb 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -47,39 +47,17 @@ use crate::tokenizer::Tokenizer; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.block_quote { - tokenizer.attempt(State::Next(StateName::BlockQuoteBefore), State::Nok); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - )) + tokenizer.enter(Name::BlockQuote); + State::Retry(StateName::BlockQuoteContStart) } else { State::Nok } } -/// Start of block quote, after whitespace, before `>`. -/// -/// ```markdown -/// > | > a -/// ^ -/// ``` -pub fn before(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'>') => { - tokenizer.enter(Name::BlockQuote); - State::Retry(StateName::BlockQuoteContBefore) - } - _ => State::Retry(StateName::BlockQuoteContBefore), - } -} - /// Start of block quote continuation. /// +/// Also used to parse the first block quote opening. +/// /// ```markdown /// | > a /// > | > b @@ -98,7 +76,9 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State { )) } -/// After whitespace, before `>`. +/// At `>`, after optional whitespace. +/// +/// Also used to parse the first block quote opening. /// /// ```markdown /// | > a diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index e0f36c7..494f1d2 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -37,7 +37,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of a character escape. +/// Start of character escape. /// /// ```markdown /// > | a\*b @@ -56,7 +56,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a character escape, after `\`. +/// After `\`, at punctuation. /// /// ```markdown /// > | a\*b diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 476ea14..6171927 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -70,7 +70,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::slice::Slice; -/// Start of a character reference. +/// Start of character reference. /// /// ```markdown /// > | a&b @@ -93,8 +93,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a character reference, after `&`, before `#` for numeric references -/// or an alphanumeric for named references. +/// After `&`, at `#` for numeric references or alphanumeric for named +/// references. /// /// ```markdown /// > | a&b @@ -117,8 +117,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a numeric character reference, right before `x` for hexadecimals, -/// or a digit for decimals. +/// After `#`, at `x` for hexadecimals or digit for decimals. /// /// ```markdown /// > | a{b @@ -141,8 +140,7 @@ pub fn numeric(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a character reference value, after the markers (`&#x`, `&#`, or -/// `&`) that define its kind, but before the `;`. +/// After markers (`&#x`, `&#`, or `&`), in value, before `;`. /// /// The character reference kind defines what and how many characters are /// allowed. diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index ed39917..ac9a63f 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -139,7 +139,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence, after an optional prefix, before a sequence. +/// In opening fence, after prefix, at sequence. /// /// ```markdown /// > | ~~~js @@ -171,7 +171,7 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence sequence. +/// In opening fence sequence. /// /// ```markdown /// > | ~~~js @@ -205,7 +205,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence, after the sequence (and optional whitespace), before the info. +/// In opening fence, after the sequence (and optional whitespace), before info. /// /// ```markdown /// > | ~~~js @@ -233,7 +233,7 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence info. +/// In info. /// /// ```markdown /// > | ~~~js @@ -271,7 +271,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence, after the info and whitespace, before the meta. +/// In opening fence, after info and whitespace, before meta. /// /// ```markdown /// > | ~~~js eval @@ -290,7 +290,7 @@ pub fn meta_before(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence meta. +/// In meta. /// /// ```markdown /// > | ~~~js eval @@ -319,7 +319,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { } } -/// At an eol/eof in code, before a non-lazy closing fence or content. +/// At eol/eof in code, before a non-lazy closing fence or content. /// /// ```markdown /// > | ~~~js @@ -336,7 +336,7 @@ pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::CodeFencedCloseBefore) } -/// Before a closing fence, at the line ending. +/// Before closing fence, at eol. /// /// ```markdown /// | ~~~js @@ -356,7 +356,7 @@ pub fn close_before(tokenizer: &mut Tokenizer) -> State { } } -/// Before a closing fence, before optional whitespace. +/// Before closing fence, at optional whitespace. /// /// ```markdown /// | ~~~js @@ -383,7 +383,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { )) } -/// In a closing fence, after optional whitespace, before sequence. +/// In closing fence, after optional whitespace, at sequence. /// /// ```markdown /// | ~~~js @@ -401,7 +401,7 @@ pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { } } -/// In the closing fence sequence. +/// In closing fence sequence. /// /// ```markdown /// | ~~~js @@ -434,7 +434,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { } } -/// After the closing fence sequence after optional whitespace. +/// After closing fence sequence, after optional whitespace. /// /// ```markdown /// | ~~~js @@ -452,7 +452,7 @@ pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State { } } -/// Before a closing fence, at the line ending. +/// Before closing fence, at eol. /// /// ```markdown /// | ~~~js @@ -466,6 +466,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Name::LineEnding); State::Next(StateName::CodeFencedContentStart) } + /// Before code content, definitely not before a closing fence. /// /// ```markdown @@ -486,7 +487,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { )) } -/// Before code content, after a prefix. +/// Before code content, after optional prefix. /// /// ```markdown /// | ~~~js diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 2ab117e..3a82dc4 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -95,7 +95,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// Inside code content. +/// In code content. /// /// ```markdown /// > | aaa @@ -127,7 +127,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { State::Ok } -/// Right at a line ending, trying to parse another indent. +/// At eol, trying to parse another indent. /// /// ```markdown /// > | aaa @@ -153,7 +153,7 @@ pub fn further_start(tokenizer: &mut Tokenizer) -> State { } } -/// At an eol, which is followed by an indented line. +/// At eol, followed by an indented line. /// /// ```markdown /// > | aaa diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index d321f64..d601583 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -112,7 +112,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// In the opening sequence. +/// In opening sequence. /// /// ```markdown /// > | `a` @@ -177,7 +177,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State { } } -/// In the closing sequence. +/// In closing sequence. /// /// ```markdown /// > | `a` diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 11f1062..2378c48 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -104,7 +104,7 @@ use crate::util::{ slice::{Position, Slice}, }; -/// At the start of a definition. +/// At start of a definition. /// /// ```markdown /// > | [a]: b "c" @@ -135,7 +135,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// At the start of a definition, after whitespace. +/// After optional whitespace, at `[`. /// /// ```markdown /// > | [a]: b "c" @@ -154,7 +154,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// After the label of a definition. +/// After label. /// /// ```markdown /// > | [a]: b "c" @@ -182,7 +182,7 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State { } } -/// After the marker. +/// After marker. /// /// ```markdown /// > | [a]: b "c" @@ -196,7 +196,7 @@ pub fn marker_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// Before a destination. +/// Before destination. /// /// ```markdown /// > | [a]: b "c" @@ -216,7 +216,7 @@ pub fn destination_before(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::DestinationStart) } -/// After a destination. +/// After destination. /// /// ```markdown /// > | [a]: b "c" @@ -248,7 +248,7 @@ pub fn destination_missing(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// After a definition. +/// After definition. /// /// ```markdown /// > | [a]: b @@ -264,7 +264,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab(tokenizer)) } -/// After a definition, after optional whitespace. +/// After definition, after optional whitespace. /// /// ```markdown /// > | [a]: b @@ -304,7 +304,7 @@ pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State { } } -/// After a destination, presumably before a title. +/// After destination, at whitespace. /// /// ```markdown /// > | [a]: b @@ -320,7 +320,7 @@ pub fn title_before(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// Before a title, after a line ending. +/// At title. /// /// ```markdown /// | [a]: b @@ -335,7 +335,7 @@ pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::TitleStart) } -/// After a title. +/// After title. /// /// ```markdown /// > | [a]: b "c" @@ -352,7 +352,7 @@ pub fn title_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab(tokenizer)) } -/// After a title, after optional whitespace. +/// After title, after optional whitespace. /// /// ```markdown /// > | [a]: b "c" diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index f5030aa..cec34d5 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -43,7 +43,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of a hard break (escape). +/// Start of hard break (escape). /// /// ```markdown /// > | a\ @@ -61,7 +61,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// At the end of a hard break (escape), after `\`. +/// After `\`, at eol. /// /// ```markdown /// > | a\ diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 17cf617..974158f 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Start of a heading (atx), after whitespace. +/// After optional whitespace, at `#`. /// /// ```markdown /// > | ## aa @@ -100,7 +100,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// In the opening sequence. +/// In opening sequence. /// /// ```markdown /// > | ## aa @@ -131,7 +131,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { } } -/// After something but before something else. +/// After something, before something else. /// /// ```markdown /// > | ## aa @@ -161,7 +161,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// In a further sequence (after whitespace). +/// In further sequence (after whitespace). /// /// Could be normal “visible” hashes in the heading or a final sequence. /// diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index e31ce76..8485f5a 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -65,7 +65,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::skip::opt_back as skip_opt_back; -/// At a line ending, presumably an underline. +/// At start of heading (setext) underline. /// /// ```markdown /// | aa @@ -100,7 +100,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After optional whitespace, presumably an underline. +/// After optional whitespace, at `-` or `=`. /// /// ```markdown /// | aa @@ -118,7 +118,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// In an underline sequence. +/// In sequence. /// /// ```markdown /// | aa @@ -143,7 +143,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } } -/// After an underline sequence, after optional whitespace. +/// After sequence, after optional whitespace. /// /// ```markdown /// | aa diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index b5e1815..2da4f47 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -124,7 +124,7 @@ const BASIC: u8 = 6; /// Symbol for `<x>` (condition 7). const COMPLETE: u8 = 7; -/// Start of HTML (flow), before optional whitespace. +/// Start of HTML (flow). /// /// ```markdown /// > | <x /> @@ -153,7 +153,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After optional whitespace, before `<`. +/// At `<`, after optional whitespace. /// /// ```markdown /// > | <x /> @@ -169,7 +169,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// After `<`, before a tag name or other stuff. +/// After `<`, at tag name or other stuff. /// /// ```markdown /// > | <x /> @@ -209,7 +209,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!`, so inside a declaration, comment, or CDATA. +/// After `<!`, at declaration, comment, or CDATA. /// /// ```markdown /// > | <!doctype> @@ -242,7 +242,7 @@ pub fn declaration_open(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!-`, inside a comment, before another `-`. +/// After `<!-`, inside a comment, at another `-`. /// /// ```markdown /// > | <!--xxx--> @@ -286,7 +286,7 @@ pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { } } -/// After `</`, in a closing tag, before a tag name. +/// After `</`, in closing tag, at tag name. /// /// ```markdown /// > | </x> @@ -303,7 +303,7 @@ pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State { } } -/// In a tag name. +/// In tag name. /// /// ```markdown /// > | <ab> @@ -372,7 +372,7 @@ pub fn tag_name(tokenizer: &mut Tokenizer) -> State { } } -/// After a closing slash of a basic tag name. +/// After closing slash of a basic tag name. /// /// ```markdown /// > | <div/> @@ -390,7 +390,7 @@ pub fn basic_self_closing(tokenizer: &mut Tokenizer) -> State { } } -/// After a closing slash of a complete tag name. +/// After closing slash of a complete tag name. /// /// ```markdown /// > | <x/> @@ -406,7 +406,7 @@ pub fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State { } } -/// At a place where an attribute name would be valid. +/// At an attribute name. /// /// At first, this state is used after a complete tag name, after whitespace, /// where it expects optional attributes or the end of the tag. @@ -444,7 +444,7 @@ pub fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State { } } -/// In an attribute name. +/// In attribute name. /// /// ```markdown /// > | <a :b> @@ -465,8 +465,8 @@ pub fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State { } } -/// After an attribute name, before an attribute initializer, the end of the -/// tag, or whitespace. +/// After attribute name, at an optional initializer, the end of the tag, or +/// whitespace. /// /// ```markdown /// > | <a b> @@ -488,8 +488,8 @@ pub fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State { } } -/// Before an unquoted, double quoted, or single quoted attribute value, -/// allowing whitespace. +/// Before unquoted, double quoted, or single quoted attribute value, allowing +/// whitespace. /// /// ```markdown /// > | <a b=c> @@ -516,7 +516,7 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { } } -/// In a double or single quoted attribute value. +/// In double or single quoted attribute value. /// /// ```markdown /// > | <a b="c"> @@ -543,7 +543,7 @@ pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { } } -/// In an unquoted attribute value. +/// In unquoted attribute value. /// /// ```markdown /// > | <a b=c> @@ -561,7 +561,7 @@ pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { } } -/// After a double or single quoted attribute value, before whitespace or the +/// After double or single quoted attribute value, before whitespace or the /// end of the tag. /// /// ```markdown @@ -617,7 +617,7 @@ pub fn complete_after(tokenizer: &mut Tokenizer) -> State { } } -/// Inside continuation of any HTML kind. +/// In continuation of any HTML kind. /// /// ```markdown /// > | <!--xxx--> @@ -668,7 +668,7 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { } } -/// In continuation, at an eol. +/// In continuation, at eol. /// /// ```markdown /// > | <x> @@ -683,7 +683,7 @@ pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::NonLazyContinuationStart) } -/// In continuation, at an eol, before non-lazy content. +/// In continuation, at eol, before non-lazy content. /// /// ```markdown /// > | <x> @@ -702,7 +702,7 @@ pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State { } } -/// In continuation, after an eol, before non-lazy content. +/// In continuation, before non-lazy content. /// /// ```markdown /// | <x> @@ -735,7 +735,7 @@ pub fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State { } } -/// In raw continuation, after `<`, expecting a `/`. +/// In raw continuation, after `<`, at `/`. /// /// ```markdown /// > | <script>console.log(1)</script> @@ -752,7 +752,7 @@ pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { } } -/// In raw continuation, after `</`, expecting or inside a raw tag name. +/// In raw continuation, after `</`, in a raw tag name. /// /// ```markdown /// > | <script>console.log(1)</script> @@ -807,7 +807,7 @@ pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State { } } -/// In declaration or instruction continuation, waiting for `>` to close it. +/// In declaration or instruction continuation, at `>`. /// /// ```markdown /// > | <!--> @@ -870,7 +870,7 @@ pub fn continuation_after(tokenizer: &mut Tokenizer) -> State { State::Ok } -/// Before a line ending, expecting a blank line. +/// Before eol, expecting blank line. /// /// ```markdown /// > | <div> diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index b92b9fa..fde78de 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -77,7 +77,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After `<`, before a tag name or other stuff. +/// After `<`, at tag name or other stuff. /// /// ```markdown /// > | a <b> c @@ -110,7 +110,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!`, so inside a declaration, comment, or CDATA. +/// After `<!`, at declaration, comment, or CDATA. /// /// ```markdown /// > | a <!doctype> c @@ -139,7 +139,7 @@ pub fn declaration_open(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!-`, inside a comment, before another `-`. +/// In a comment, after `<!-`, at another `-`. /// /// ```markdown /// > | a <!--b--> c @@ -155,7 +155,7 @@ pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!--`, inside a comment +/// After `<!--`, in a comment. /// /// > 👉 **Note**: [html (flow)][html_flow] does allow `<!-->` or `<!--->` as /// > empty comments. @@ -179,7 +179,7 @@ pub fn comment_start(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!---`, inside a comment +/// After `<!---`, in a comment. /// /// > 👉 **Note**: [html (flow)][html_flow] does allow `<!-->` or `<!--->` as /// > empty comments. @@ -199,7 +199,7 @@ pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { } } -/// In a comment. +/// In comment. /// /// ```markdown /// > | a <!--b--> c @@ -223,7 +223,7 @@ pub fn comment(tokenizer: &mut Tokenizer) -> State { } } -/// In a comment, after `-`. +/// In comment, after `-`. /// /// ```markdown /// > | a <!--b--> c @@ -239,7 +239,7 @@ pub fn comment_close(tokenizer: &mut Tokenizer) -> State { } } -/// After `<![`, inside CDATA, expecting `CDATA[`. +/// After `<![`, in CDATA, expecting `CDATA[`. /// /// ```markdown /// > | a <![CDATA[>&<]]> b @@ -285,7 +285,7 @@ pub fn cdata(tokenizer: &mut Tokenizer) -> State { } } -/// In CDATA, after `]`. +/// In CDATA, after `]`, at another `]`. /// /// ```markdown /// > | a <![CDATA[>&<]]> b @@ -301,7 +301,7 @@ pub fn cdata_close(tokenizer: &mut Tokenizer) -> State { } } -/// In CDATA, after `]]`. +/// In CDATA, after `]]`, at `>`. /// /// ```markdown /// > | a <![CDATA[>&<]]> b @@ -315,7 +315,7 @@ pub fn cdata_end(tokenizer: &mut Tokenizer) -> State { } } -/// In a declaration. +/// In declaration. /// /// ```markdown /// > | a <!b> c @@ -335,7 +335,7 @@ pub fn declaration(tokenizer: &mut Tokenizer) -> State { } } -/// In an instruction. +/// In instruction. /// /// ```markdown /// > | a <?b?> c @@ -359,7 +359,7 @@ pub fn instruction(tokenizer: &mut Tokenizer) -> State { } } -/// In an instruction, after `?`. +/// In instruction, after `?`, at `>`. /// /// ```markdown /// > | a <?b?> c @@ -372,7 +372,7 @@ pub fn instruction_close(tokenizer: &mut Tokenizer) -> State { } } -/// After `</`, in a closing tag, before a tag name. +/// After `</`, in closing tag, at tag name. /// /// ```markdown /// > | a </b> c @@ -406,7 +406,7 @@ pub fn tag_close(tokenizer: &mut Tokenizer) -> State { } } -/// In a closing tag, after the tag name. +/// In closing tag, after tag name. /// /// ```markdown /// > | a </b> c @@ -426,7 +426,7 @@ pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State { } } -/// After `<x`, in an opening tag name. +/// After `<x`, in opening tag name. /// /// ```markdown /// > | a <b> c @@ -444,7 +444,7 @@ pub fn tag_open(tokenizer: &mut Tokenizer) -> State { } } -/// In an opening tag, after the tag name. +/// In opening tag, after tag name. /// /// ```markdown /// > | a <b> c @@ -473,7 +473,7 @@ pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State { } } -/// In an attribute name. +/// In attribute name. /// /// ```markdown /// > | a <b c> d @@ -490,8 +490,8 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { } } -/// After an attribute name, before an attribute initializer, the end of the -/// tag, or whitespace. +/// After attribute name, before initializer, the end of the tag, or +/// whitespace. /// /// ```markdown /// > | a <b c> d @@ -518,8 +518,8 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { } } -/// Before an unquoted, double quoted, or single quoted attribute value, -/// allowing whitespace. +/// Before unquoted, double quoted, or single quoted attribute value, allowing +/// whitespace. /// /// ```markdown /// > | a <b c=d> e @@ -551,7 +551,7 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { } } -/// In a double or single quoted attribute value. +/// In double or single quoted attribute value. /// /// ```markdown /// > | a <b c="d"> e @@ -582,7 +582,7 @@ pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { } } -/// In an unquoted attribute value. +/// In unquoted attribute value. /// /// ```markdown /// > | a <b c=d> e @@ -599,8 +599,8 @@ pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { } } -/// After a double or single quoted attribute value, before whitespace or the -/// end of the tag. +/// After double or single quoted attribute value, before whitespace or the end +/// of the tag. /// /// ```markdown /// > | a <b c="d"> e @@ -613,7 +613,7 @@ pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State } } -/// In certain circumstances of a complete tag where only an `>` is allowed. +/// In certain circumstances of a tag where only an `>` is allowed. /// /// ```markdown /// > | a <b c="d"> e @@ -631,7 +631,7 @@ pub fn end(tokenizer: &mut Tokenizer) -> State { } } -/// At an allowed line ending. +/// At eol. /// /// > 👉 **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. @@ -654,7 +654,7 @@ pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State { } } -/// After a line ending. +/// After eol, at optional whitespace. /// /// > 👉 **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. @@ -672,7 +672,7 @@ pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab(tokenizer)) } -/// After a line ending, after indent. +/// After eol, after optional whitespace. /// /// > 👉 **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index da53125..8801ea7 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -291,6 +291,10 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State { let is_link = tokenizer.events[label_start.start.0].name == Name::LabelLink; + // If this is a link, we need to mark earlier link starts as no longer + // viable for use (as they would otherwise contain a link). + // These link starts are still looking for balanced closing brackets, so + // we can’t remove them. if is_link { let mut index = 0; while index < tokenizer.tokenize_state.label_start_stack.len() { @@ -332,7 +336,7 @@ pub fn nok(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// Before a resource, at `(`. +/// At a resource. /// /// ```markdown /// > | [a](b) c @@ -351,7 +355,7 @@ pub fn resource_start(tokenizer: &mut Tokenizer) -> State { } } -/// At the start of a resource, after `(`, before a destination. +/// In resource, after `(`, at optional whitespace. /// /// ```markdown /// > | [a](b) c @@ -365,7 +369,7 @@ pub fn resource_before(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// At the start of a resource, after optional whitespace. +/// In resource, after optional whitespace, at `)` or a destination. /// /// ```markdown /// > | [a](b) c @@ -390,7 +394,7 @@ pub fn resource_open(tokenizer: &mut Tokenizer) -> State { } } -/// In a resource, after a destination, before optional whitespace. +/// In resource, after destination, at optional whitespace. /// /// ```markdown /// > | [a](b) c @@ -410,7 +414,12 @@ pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// Without destination. +/// At invalid destination. +/// +/// ```markdown +/// > | [a](<<) b +/// ^ +/// ``` pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; @@ -421,7 +430,7 @@ pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// In a resource, after a destination, after whitespace. +/// In resource, after destination and whitespace, at `(` or title. /// /// ```markdown /// > | [a](b ) c @@ -443,7 +452,7 @@ pub fn resource_between(tokenizer: &mut Tokenizer) -> State { } } -/// In a resource, after a title. +/// In resource, after title, at optional whitespace. /// /// ```markdown /// > | [a](b "c") d @@ -460,7 +469,7 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// In a resource, at the `)`. +/// In resource, at `)`. /// /// ```markdown /// > | [a](b) d @@ -479,7 +488,7 @@ pub fn resource_end(tokenizer: &mut Tokenizer) -> State { } } -/// In a reference (full), at the `[`. +/// In reference (full), at `[`. /// /// ```markdown /// > | [a][b] d @@ -501,7 +510,7 @@ pub fn reference_full(tokenizer: &mut Tokenizer) -> State { } } -/// In a reference (full), after `]`. +/// In reference (full), after `]`. /// /// ```markdown /// > | [a][b] d @@ -537,7 +546,7 @@ pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State { } } -/// In a reference (collapsed), at the `[`. +/// In reference (collapsed), at `[`. /// /// > 👉 **Note**: we only get here if the label is defined. /// @@ -558,7 +567,7 @@ pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State { } } -/// In a reference (collapsed), at the `]`. +/// In reference (collapsed), at `]`. /// /// > 👉 **Note**: we only get here if the label is defined. /// @@ -581,8 +590,8 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State { /// Resolve media. /// -/// This turns correct label start (image, link) and label end into links and -/// images, or turns them back into data. +/// This turns matching label start (image, link) and label ends into links and +/// images, and turns unmatched label starts back into data. #[allow(clippy::too_many_lines)] pub fn resolve(tokenizer: &mut Tokenizer) { let mut left = tokenizer.tokenize_state.label_start_list_loose.split_off(0); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index b8c8858..ce09f5b 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -52,7 +52,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After `!`, before a `[`. +/// After `!`, at `[`. /// /// ```markdown /// > | a ![b] c diff --git a/src/construct/list.rs b/src/construct/list.rs index 20be73c..206f823 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -79,7 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Start of list item, after whitespace. +/// After optional whitespace, at list item prefix. /// /// ```markdown /// > | * a @@ -89,10 +89,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Unordered. Some(b'*' | b'-') => { - tokenizer.check( - State::Next(StateName::ListNok), - State::Next(StateName::ListBeforeUnordered), - ); + tokenizer.check(State::Nok, State::Next(StateName::ListBeforeUnordered)); State::Retry(StateName::ThematicBreakStart) } Some(b'+') => State::Retry(StateName::ListBeforeUnordered), @@ -103,7 +100,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// Start of an unordered list item. +/// At unordered list item marker. /// /// The line is not a thematic break. /// @@ -116,7 +113,7 @@ pub fn before_unordered(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::ListMarker) } -/// Start of an ordered list item. +/// At ordered list item value. /// /// ```markdown /// > | * a @@ -128,7 +125,7 @@ pub fn before_ordered(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::ListValue) } -/// In an ordered list item value. +/// In ordered list item value. /// /// ```markdown /// > | 1. a @@ -152,7 +149,7 @@ pub fn value(tokenizer: &mut Tokenizer) -> State { } } -/// At a list item marker. +/// At list item marker. /// /// ```markdown /// > | * a @@ -167,7 +164,7 @@ pub fn marker(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::ListMarkerAfter) } -/// After a list item marker. +/// After list item marker. /// /// ```markdown /// > | * a @@ -184,7 +181,9 @@ pub fn marker_after(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::BlankLineStart) } -/// After a list item marker, not followed by a blank line. +/// After list item marker. +/// +/// The marker is not followed by a blank line. /// /// ```markdown /// > | * a @@ -201,7 +200,7 @@ pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::ListWhitespace) } -/// In whitespace after a marker. +/// After marker, at whitespace. /// /// ```markdown /// > | * a @@ -226,7 +225,7 @@ pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State { } } -/// After a list item marker, followed by no indent or more indent that needed. +/// After marker, followed by no indent or more indent that needed. /// /// ```markdown /// > | * a @@ -244,7 +243,7 @@ pub fn prefix_other(tokenizer: &mut Tokenizer) -> State { } } -/// After a list item prefix. +/// After list item prefix. /// /// ```markdown /// > | * a @@ -318,7 +317,6 @@ pub fn cont_blank(tokenizer: &mut Tokenizer) -> State { if container.blank_initial { State::Nok } else { - tokenizer.attempt(State::Next(StateName::ListOk), State::Nok); // Consume, optionally, at most `size`. State::Retry(space_or_tab_min_max(tokenizer, 0, size)) } @@ -338,21 +336,10 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State { container.blank_initial = false; - tokenizer.attempt(State::Next(StateName::ListOk), State::Nok); // Consume exactly `size`. State::Retry(space_or_tab_min_max(tokenizer, size, size)) } -/// A state fn to yield [`State::Ok`]. -pub fn ok(_tokenizer: &mut Tokenizer) -> State { - State::Ok -} - -/// A state fn to yield [`State::Nok`]. -pub fn nok(_tokenizer: &mut Tokenizer) -> State { - State::Nok -} - /// Find adjacent list items with the same marker. pub fn resolve(tokenizer: &mut Tokenizer) { let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![]; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 663b01b..c956a2c 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -38,7 +38,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::skip::opt as skip_opt; -/// Before a paragraph. +/// Before paragraph. /// /// ```markdown /// > | abc @@ -55,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// In a paragraph. +/// In paragraph. /// /// ```markdown /// > | abc diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs index 0175971..1818ef4 100644 --- a/src/construct/partial_bom.rs +++ b/src/construct/partial_bom.rs @@ -16,7 +16,7 @@ use crate::tokenizer::Tokenizer; const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF]; -/// Before a BOM. +/// Before BOM. /// /// ```text /// > | 0xEF 0xBB 0xBF @@ -31,7 +31,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the BOM. +/// In BOM. /// /// ```text /// > | 0xEF 0xBB 0xBF diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index f9b7947..b6b0f59 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -11,7 +11,7 @@ use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// At the beginning of data. +/// At beginning of data. /// /// ```markdown /// > | abc @@ -79,11 +79,10 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { /// Merge adjacent data events. pub fn resolve(tokenizer: &mut Tokenizer) { - let len = tokenizer.events.len(); let mut index = 0; // Loop through events and merge adjacent data events. - while index < len { + while index < tokenizer.events.len() { let event = &tokenizer.events[index]; if event.kind == Kind::Enter && event.name == Name::Data { @@ -91,7 +90,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { let mut exit_far_index = exit_index; // Find multiple `data` events. - while exit_far_index + 1 < len + while exit_far_index + 1 < tokenizer.events.len() && tokenizer.events[exit_far_index + 1].name == Name::Data { exit_far_index += 2; diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index c1c1e10..dc5c904 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -75,7 +75,7 @@ use crate::event::{Content, Name}; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Before a destination. +/// Start of destination. /// /// ```markdown /// > | <aa> @@ -105,7 +105,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After `<`, before an enclosed destination. +/// After `<`, at an enclosed destination. /// /// ```markdown /// > | <aa> @@ -126,7 +126,7 @@ pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State { } } -/// In an enclosed destination. +/// In enclosed destination. /// /// ```markdown /// > | <aa> @@ -151,7 +151,7 @@ pub fn enclosed(tokenizer: &mut Tokenizer) -> State { } } -/// After `\`, in an enclosed destination. +/// After `\`, at a special character. /// /// ```markdown /// > | <a\*a> @@ -167,7 +167,7 @@ pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { } } -/// In a raw destination. +/// In raw destination. /// /// ```markdown /// > | aa @@ -209,7 +209,7 @@ pub fn raw(tokenizer: &mut Tokenizer) -> State { } } -/// After `\`, in a raw destination. +/// After `\`, at special character. /// /// ```markdown /// > | a\*a diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 0f7aa00..a1667e1 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -65,7 +65,7 @@ use crate::state::{Name as StateName, State}; use crate::subtokenize::link; use crate::tokenizer::Tokenizer; -/// Before a label. +/// Start of label. /// /// ```markdown /// > | [a] @@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// In a label, at something. +/// In label, at something, before something else. /// /// ```markdown /// > | [a] @@ -142,7 +142,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// In a label, after whitespace. +/// In label, after whitespace. /// /// ```markdown /// | [a␊ @@ -154,7 +154,7 @@ pub fn eol_after(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::LabelAtBreak) } -/// In a label, at a blank line. +/// In label, at blank line. /// /// ```markdown /// | [a␊ @@ -168,7 +168,7 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// In a label, in text. +/// In label, in text. /// /// ```markdown /// > | [a] @@ -200,7 +200,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } } -/// After `\` in a label. +/// After `\`, at a special character. /// /// ```markdown /// > | [a\*a] diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 76854c8..3bbf7cc 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -14,7 +14,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of continuation. +/// At eol, before continuation. /// /// ```markdown /// > | * ```js @@ -33,7 +33,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After line ending. +/// A continuation. /// /// ```markdown /// | * ```js diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index f2d9a73..5f1b4cf 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -61,7 +61,7 @@ pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName::SpaceOrTabStart } -/// Before `space_or_tab`. +/// Start of `space_or_tab`. /// /// ```markdown /// > | a␠␠b diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs index 2127fe6..08f4bf2 100644 --- a/src/construct/partial_space_or_tab_eol.rs +++ b/src/construct/partial_space_or_tab_eol.rs @@ -44,7 +44,16 @@ pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options StateName::SpaceOrTabEolStart } -pub fn eol_start(tokenizer: &mut Tokenizer) -> State { +/// Start of whitespace with at most one eol. +/// +/// ```markdown +/// > | a␠␠b +/// ^ +/// > | a␠␠␊ +/// ^ +/// | ␠␠b +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::SpaceOrTabEolAfterFirst), State::Next(StateName::SpaceOrTabEolAtEol), @@ -65,7 +74,16 @@ pub fn eol_start(tokenizer: &mut Tokenizer) -> State { )) } -pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { +/// After initial whitespace, at optional eol. +/// +/// ```markdown +/// > | a␠␠b +/// ^ +/// > | a␠␠␊ +/// ^ +/// | ␠␠b +/// ``` +pub fn after_first(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_eol_ok = true; if tokenizer @@ -79,14 +97,19 @@ pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::SpaceOrTabEolAtEol) } -/// `space_or_tab_eol`: after optionally first `space_or_tab`. +/// After optional whitespace, at eol. /// /// ```markdown -/// > | a +/// > | a␠␠b +/// ^ +/// > | a␠␠␊ +/// ^ +/// | ␠␠b +/// > | a␊ /// ^ -/// | b +/// | ␠␠b /// ``` -pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State { +pub fn at_eol(tokenizer: &mut Tokenizer) -> State { if let Some(b'\n') = tokenizer.current { tokenizer.enter_with_content( Name::LineEnding, @@ -123,15 +146,17 @@ pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State { } } -/// `space_or_tab_eol`: after eol. +/// After eol. /// /// ```markdown -/// | a -/// > | b +/// | a␠␠␊ +/// > | ␠␠b +/// ^ +/// | a␊ +/// > | ␠␠b /// ^ /// ``` -#[allow(clippy::needless_pass_by_value)] -pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { +pub fn after_eol(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::SpaceOrTabEolAfterMore), State::Next(StateName::SpaceOrTabEolAfterMore), @@ -151,14 +176,17 @@ pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { )) } -/// `space_or_tab_eol`: after more (optional) `space_or_tab`. +/// After optional final whitespace. /// /// ```markdown -/// | a -/// > | b -/// ^ +/// | a␠␠␊ +/// > | ␠␠b +/// ^ +/// | a␊ +/// > | ␠␠b +/// ^ /// ``` -pub fn eol_after_more(tokenizer: &mut Tokenizer) -> State { +pub fn after_more(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_eol_content_type = None; tokenizer.tokenize_state.space_or_tab_eol_connect = false; tokenizer.tokenize_state.space_or_tab_eol_ok = false; diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 6f7a037..b97243e 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -36,7 +36,7 @@ use crate::state::{Name as StateName, State}; use crate::subtokenize::link; use crate::tokenizer::Tokenizer; -/// Before a title. +/// Start of title. /// /// ```markdown /// > | "a" @@ -57,9 +57,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After the opening marker. +/// After opening marker. /// -/// This is also used when at the closing marker. +/// This is also used at the closing marker. /// /// ```markdown /// > | "a" @@ -132,7 +132,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// In a title, after whitespace. +/// In title, after whitespace. /// /// ```markdown /// | "a␊ @@ -144,7 +144,7 @@ pub fn after_eol(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::TitleAtBreak) } -/// In a title, at a blank line. +/// In title, at blank line. /// /// ```markdown /// | "a␊ @@ -158,7 +158,7 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// In title text. +/// In text. /// /// ```markdown /// > | "a" @@ -187,7 +187,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } } -/// After `\`, in title text. +/// After `\`, at a special character. /// /// ```markdown /// > | "a\*b" diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index f4d008d..b2989cb 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -54,7 +54,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of a thematic break. +/// Start of thematic break. /// /// ```markdown /// > | *** @@ -78,7 +78,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Start of a thematic break, after whitespace. +/// After optional whitespace, at marker. /// /// ```markdown /// > | *** @@ -94,7 +94,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// After something but before something else. +/// After something, before something else. /// /// ```markdown /// > | *** @@ -124,7 +124,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// In a sequence of markers. +/// In sequence. /// /// ```markdown /// > | *** |