diff options
Diffstat (limited to '')
43 files changed, 611 insertions, 553 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 8f3036a..a935367 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -12,7 +12,7 @@ use crate::util::{ use crate::{LineEnding, Options}; use std::str; -/// Representation of a link or image, resource or reference. +/// Link or image, resource or reference. /// Reused for temporary definitions as well, in the first pass. #[derive(Debug)] struct Media { diff --git a/src/construct/attention.rs b/src/construct/attention.rs index e974fae..3a29d06 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -60,7 +60,7 @@ use crate::util::slice::Slice; /// Character code kinds. #[derive(Debug, PartialEq)] -enum GroupKind { +enum CharacterKind { /// Whitespace. /// /// ## Example @@ -98,7 +98,7 @@ struct Sequence { /// The depth in events where this sequence resides. balance: usize, /// The index into events where this sequence’s `Enter` currently resides. - event_index: usize, + index: usize, /// The (shifted) point where this sequence starts. start_point: Point, /// The (shifted) point where this sequence end. @@ -111,7 +111,7 @@ struct Sequence { close: bool, } -/// Before a sequence. +/// At start of attention. /// /// ```markdown /// > | ** @@ -128,7 +128,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// In a sequence. +/// In sequence. /// /// ```markdown /// > | ** @@ -136,7 +136,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { + Some(b'*' | b'_') if tokenizer.current == Some(tokenizer.tokenize_state.marker) => { tokenizer.consume(); State::Next(StateName::AttentionInside) } @@ -150,28 +150,28 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } /// Resolve attention sequences. -#[allow(clippy::too_many_lines)] pub fn resolve(tokenizer: &mut Tokenizer) { - let mut start = 0; + let mut index = 0; let mut balance = 0; let mut sequences = vec![]; - // Find sequences of sequences and information about them. - while start < tokenizer.events.len() { - let enter = &tokenizer.events[start]; + // Find all sequences, gather info about them. + while index < tokenizer.events.len() { + let enter = &tokenizer.events[index]; if enter.kind == Kind::Enter { balance += 1; if enter.name == Name::AttentionSequence { - let end = start + 1; + let end = index + 1; let exit = &tokenizer.events[end]; let before_end = enter.point.index; let before_start = if before_end < 4 { 0 } else { before_end - 4 }; - let string_before = - String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]); - let char_before = string_before.chars().last(); + let char_before = + String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]) + .chars() + .last(); let after_start = exit.point.index; let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() { @@ -179,26 +179,27 @@ pub fn resolve(tokenizer: &mut Tokenizer) { } else { after_start + 4 }; - let string_after = - String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]); - let char_after = string_after.chars().next(); + let char_after = + String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]) + .chars() + .next(); let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point) .head() .unwrap(); let before = classify_character(char_before); let after = classify_character(char_after); - let open = after == GroupKind::Other - || (after == GroupKind::Punctuation && before != GroupKind::Other); + let open = after == CharacterKind::Other + || (after == CharacterKind::Punctuation && before != CharacterKind::Other); // To do: GFM strikethrough? - // || attentionMarkers.includes(code) - let close = before == GroupKind::Other - || (before == GroupKind::Punctuation && after != GroupKind::Other); + // || char_after == '~' + let close = before == CharacterKind::Other + || (before == CharacterKind::Punctuation && after != CharacterKind::Other); // To do: GFM strikethrough? - // || attentionMarkers.includes(previous) + // || char_before == '~' sequences.push(Sequence { - event_index: start, + index, balance, start_point: enter.point.clone(), end_point: exit.point.clone(), @@ -206,12 +207,12 @@ pub fn resolve(tokenizer: &mut Tokenizer) { open: if marker == b'*' { open } else { - open && (before != GroupKind::Other || !close) + open && (before != CharacterKind::Other || !close) }, close: if marker == b'*' { close } else { - close && (after != GroupKind::Other || !open) + close && (after != CharacterKind::Other || !open) }, marker, }); @@ -220,10 +221,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) { balance -= 1; } - start += 1; + index += 1; } - // Walk through sequences and match them. + // Now walk through them and match them. let mut close = 0; while close < sequences.len() { @@ -240,7 +241,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { let sequence_open = &sequences[open]; - // We found a sequence that can open the closer we found. + // An opener matching our closer: if sequence_open.open && sequence_close.marker == sequence_open.marker && sequence_close.balance == sequence_open.balance @@ -257,175 +258,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { } // We’ve found a match! - - // Number of markers to use from the sequence. - let take = if sequence_open.size > 1 && sequence_close.size > 1 { - 2 - } else { - 1 - }; - - // We’re *on* a closing sequence, with a matching opening - // sequence. - // Now we make sure that we can’t have misnested attention: - // - // ```html - // <em>a <strong>b</em> c</strong> - // ``` - // - // Do that by marking everything between it as no longer - // possible to open anything. - // Theoretically we could mark non-closing as well, but we - // don’t look for closers backwards. - let mut between = open + 1; - - while between < close { - sequences[between].open = false; - between += 1; - } - - let sequence_close = &mut sequences[close]; - let close_event_index = sequence_close.event_index; - let seq_close_enter = sequence_close.start_point.clone(); - // No need to worry about `VS`, because sequences are only actual characters. - sequence_close.size -= take; - sequence_close.start_point.column += take; - sequence_close.start_point.index += take; - let seq_close_exit = sequence_close.start_point.clone(); - - // Stay on this closing sequence for the next iteration: it - // might close more things. - next_index -= 1; - - // Remove closing sequence if fully used. - if sequence_close.size == 0 { - sequences.remove(close); - tokenizer.map.add(close_event_index, 2, vec![]); - } else { - // Shift remaining closing sequence forward. - // Do it here because a sequence can open and close different - // other sequences, and the remainder can be on any side or - // somewhere in the middle. - let mut enter = &mut tokenizer.events[close_event_index]; - enter.point = seq_close_exit.clone(); - } - - let sequence_open = &mut sequences[open]; - let open_event_index = sequence_open.event_index; - let seq_open_exit = sequence_open.end_point.clone(); - // No need to worry about `VS`, because sequences are only actual characters. - sequence_open.size -= take; - sequence_open.end_point.column -= take; - sequence_open.end_point.index -= take; - let seq_open_enter = sequence_open.end_point.clone(); - - // Remove opening sequence if fully used. - if sequence_open.size == 0 { - sequences.remove(open); - tokenizer.map.add(open_event_index, 2, vec![]); - next_index -= 1; - } else { - // Shift remaining opening sequence backwards. - // See note above for why that happens here. - let mut exit = &mut tokenizer.events[open_event_index + 1]; - exit.point = seq_open_enter.clone(); - } - - // Opening. - tokenizer.map.add_before( - // Add after the current sequence (it might remain). - open_event_index + 2, - 0, - vec![ - Event { - kind: Kind::Enter, - name: if take == 1 { - Name::Emphasis - } else { - Name::Strong - }, - point: seq_open_enter.clone(), - link: None, - }, - Event { - kind: Kind::Enter, - name: if take == 1 { - Name::EmphasisSequence - } else { - Name::StrongSequence - }, - point: seq_open_enter.clone(), - link: None, - }, - Event { - kind: Kind::Exit, - name: if take == 1 { - Name::EmphasisSequence - } else { - Name::StrongSequence - }, - point: seq_open_exit.clone(), - link: None, - }, - Event { - kind: Kind::Enter, - name: if take == 1 { - Name::EmphasisText - } else { - Name::StrongText - }, - point: seq_open_exit.clone(), - link: None, - }, - ], - ); - // Closing. - tokenizer.map.add( - close_event_index, - 0, - vec![ - Event { - kind: Kind::Exit, - name: if take == 1 { - Name::EmphasisText - } else { - Name::StrongText - }, - point: seq_close_enter.clone(), - link: None, - }, - Event { - kind: Kind::Enter, - name: if take == 1 { - Name::EmphasisSequence - } else { - Name::StrongSequence - }, - point: seq_close_enter.clone(), - link: None, - }, - Event { - kind: Kind::Exit, - name: if take == 1 { - Name::EmphasisSequence - } else { - Name::StrongSequence - }, - point: seq_close_exit.clone(), - link: None, - }, - Event { - kind: Kind::Exit, - name: if take == 1 { - Name::Emphasis - } else { - Name::Strong - }, - point: seq_close_exit.clone(), - link: None, - }, - ], - ); + next_index = match_sequences(tokenizer, &mut sequences, open, close); break; } @@ -439,14 +272,159 @@ pub fn resolve(tokenizer: &mut Tokenizer) { let mut index = 0; while index < sequences.len() { let sequence = &sequences[index]; - tokenizer.events[sequence.event_index].name = Name::Data; - tokenizer.events[sequence.event_index + 1].name = Name::Data; + tokenizer.events[sequence.index].name = Name::Data; + tokenizer.events[sequence.index + 1].name = Name::Data; index += 1; } tokenizer.map.consume(&mut tokenizer.events); } +/// Match two sequences. +fn match_sequences( + tokenizer: &mut Tokenizer, + sequences: &mut Vec<Sequence>, + open: usize, + close: usize, +) -> usize { + // Where to move to next. + // Stay on this closing sequence for the next iteration: it + // might close more things. + // It’s changed if sequences are removed. + let mut next = close; + + // Number of markers to use from the sequence. + let take = if sequences[open].size > 1 && sequences[close].size > 1 { + 2 + } else { + 1 + }; + + // We’re *on* a closing sequence, with a matching opening + // sequence. + // Now we make sure that we can’t have misnested attention: + // + // ```html + // <em>a <strong>b</em> c</strong> + // ``` + // + // Do that by marking everything between it as no longer + // possible to open anything. + // Theoretically we should mark as `close: false` too, but + // we don’t look for closers backwards, so it’s not needed. + let mut between = open + 1; + + while between < close { + sequences[between].open = false; + between += 1; + } + + let (group_name, seq_name, text_name) = if take == 1 { + (Name::Emphasis, Name::EmphasisSequence, Name::EmphasisText) + } else { + (Name::Strong, Name::StrongSequence, Name::StrongText) + }; + let open_index = sequences[open].index; + let close_index = sequences[close].index; + let open_exit = sequences[open].end_point.clone(); + let close_enter = sequences[close].start_point.clone(); + + // No need to worry about `VS`, because sequences are only actual characters. + sequences[open].size -= take; + sequences[close].size -= take; + sequences[open].end_point.column -= take; + sequences[open].end_point.index -= take; + sequences[close].start_point.column += take; + sequences[close].start_point.index += take; + + // Opening. + tokenizer.map.add_before( + // Add after the current sequence (it might remain). + open_index + 2, + 0, + vec![ + Event { + kind: Kind::Enter, + name: group_name.clone(), + point: sequences[open].end_point.clone(), + link: None, + }, + Event { + kind: Kind::Enter, + name: seq_name.clone(), + point: sequences[open].end_point.clone(), + link: None, + }, + Event { + kind: Kind::Exit, + name: seq_name.clone(), + point: open_exit.clone(), + link: None, + }, + Event { + kind: Kind::Enter, + name: text_name.clone(), + point: open_exit, + link: None, + }, + ], + ); + // Closing. + tokenizer.map.add( + close_index, + 0, + vec![ + Event { + kind: Kind::Exit, + name: text_name, + point: close_enter.clone(), + link: None, + }, + Event { + kind: Kind::Enter, + name: seq_name.clone(), + point: close_enter, + link: None, + }, + Event { + kind: Kind::Exit, + name: seq_name, + point: sequences[close].start_point.clone(), + link: None, + }, + Event { + kind: Kind::Exit, + name: group_name, + point: sequences[close].start_point.clone(), + link: None, + }, + ], + ); + + // Remove closing sequence if fully used. + if sequences[close].size == 0 { + sequences.remove(close); + tokenizer.map.add(close_index, 2, vec![]); + } else { + // Shift remaining closing sequence forward. + // Do it here because a sequence can open and close different + // other sequences, and the remainder can be on any side or + // somewhere in the middle. + tokenizer.events[close_index].point = sequences[close].start_point.clone(); + } + + if sequences[open].size == 0 { + sequences.remove(open); + tokenizer.map.add(open_index, 2, vec![]); + // Everything shifts one to the left, account for it in next iteration. + next -= 1; + } else { + tokenizer.events[open_index + 1].point = sequences[open].end_point.clone(); + } + + next +} + /// Classify whether a character code represents whitespace, punctuation, or /// something else. /// @@ -458,15 +436,15 @@ pub fn resolve(tokenizer: &mut Tokenizer) { /// ## References /// /// * [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js) -fn classify_character(char: Option<char>) -> GroupKind { +fn classify_character(char: Option<char>) -> CharacterKind { match char { // EOF. - None => GroupKind::Whitespace, + None => CharacterKind::Whitespace, // Unicode whitespace. - Some(char) if char.is_whitespace() => GroupKind::Whitespace, + Some(char) if char.is_whitespace() => CharacterKind::Whitespace, // Unicode punctuation. - Some(char) if PUNCTUATION.contains(&char) => GroupKind::Punctuation, + Some(char) if PUNCTUATION.contains(&char) => CharacterKind::Punctuation, // Everything else. - Some(_) => GroupKind::Other, + Some(_) => CharacterKind::Other, } } diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index c0d9ae3..1bb8004 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -106,7 +106,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of an autolink. +/// Start of autolink. /// /// ```markdown /// > | a<https://example.com>b @@ -128,7 +128,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After `<`, before the protocol. +/// After `<`, at protocol or atext. /// /// ```markdown /// > | a<https://example.com>b @@ -147,7 +147,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { } } -/// After the first byte of the protocol or email name. +/// At second byte of protocol or atext. /// /// ```markdown /// > | a<https://example.com>b @@ -167,7 +167,7 @@ pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { } } -/// Inside an ambiguous protocol or email name. +/// In ambiguous protocol or atext. /// /// ```markdown /// > | a<https://example.com>b @@ -197,7 +197,7 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a URL, after the protocol. +/// After protocol, in URL. /// /// ```markdown /// > | a<https://example.com>b @@ -222,7 +222,7 @@ pub fn url_inside(tokenizer: &mut Tokenizer) -> State { } } -/// Inside email atext. +/// In email atext. /// /// ```markdown /// > | a<user.name@example.com>b @@ -261,7 +261,7 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State { } } -/// After an at-sign or a dot in the label. +/// In label, after at-sign or dot. /// /// ```markdown /// > | a<user.name@example.com>b @@ -277,7 +277,7 @@ pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { } } -/// In the label, where `.` and `>` are allowed. +/// In label, where `.` and `>` are allowed. /// /// ```markdown /// > | a<user.name@example.com>b @@ -307,7 +307,7 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State { } } -/// In the label, where `.` and `>` are *not* allowed. +/// In label, where `.` and `>` are *not* allowed. /// /// Though, this is also used in `email_label` to parse other values. /// diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index eeef637..81b58fc 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -36,7 +36,7 @@ use crate::construct::partial_space_or_tab::space_or_tab; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of a blank line. +/// Start of blank line. /// /// > 👉 **Note**: `␠` represents a space character. /// @@ -55,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab(tokenizer)) } -/// After zero or more spaces or tabs, before a line ending or EOF. +/// At eof/eol, after optional whitespace. /// /// ```markdown /// > | ␠␠␊ diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 1a32f7d..6e660cb 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -47,39 +47,17 @@ use crate::tokenizer::Tokenizer; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.block_quote { - tokenizer.attempt(State::Next(StateName::BlockQuoteBefore), State::Nok); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - )) + tokenizer.enter(Name::BlockQuote); + State::Retry(StateName::BlockQuoteContStart) } else { State::Nok } } -/// Start of block quote, after whitespace, before `>`. -/// -/// ```markdown -/// > | > a -/// ^ -/// ``` -pub fn before(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'>') => { - tokenizer.enter(Name::BlockQuote); - State::Retry(StateName::BlockQuoteContBefore) - } - _ => State::Retry(StateName::BlockQuoteContBefore), - } -} - /// Start of block quote continuation. /// +/// Also used to parse the first block quote opening. +/// /// ```markdown /// | > a /// > | > b @@ -98,7 +76,9 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State { )) } -/// After whitespace, before `>`. +/// At `>`, after optional whitespace. +/// +/// Also used to parse the first block quote opening. /// /// ```markdown /// | > a diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index e0f36c7..494f1d2 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -37,7 +37,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of a character escape. +/// Start of character escape. /// /// ```markdown /// > | a\*b @@ -56,7 +56,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a character escape, after `\`. +/// After `\`, at punctuation. /// /// ```markdown /// > | a\*b diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 476ea14..6171927 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -70,7 +70,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::slice::Slice; -/// Start of a character reference. +/// Start of character reference. /// /// ```markdown /// > | a&b @@ -93,8 +93,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a character reference, after `&`, before `#` for numeric references -/// or an alphanumeric for named references. +/// After `&`, at `#` for numeric references or alphanumeric for named +/// references. /// /// ```markdown /// > | a&b @@ -117,8 +117,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a numeric character reference, right before `x` for hexadecimals, -/// or a digit for decimals. +/// After `#`, at `x` for hexadecimals or digit for decimals. /// /// ```markdown /// > | a{b @@ -141,8 +140,7 @@ pub fn numeric(tokenizer: &mut Tokenizer) -> State { } } -/// Inside a character reference value, after the markers (`&#x`, `&#`, or -/// `&`) that define its kind, but before the `;`. +/// After markers (`&#x`, `&#`, or `&`), in value, before `;`. /// /// The character reference kind defines what and how many characters are /// allowed. diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index ed39917..ac9a63f 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -139,7 +139,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence, after an optional prefix, before a sequence. +/// In opening fence, after prefix, at sequence. /// /// ```markdown /// > | ~~~js @@ -171,7 +171,7 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence sequence. +/// In opening fence sequence. /// /// ```markdown /// > | ~~~js @@ -205,7 +205,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence, after the sequence (and optional whitespace), before the info. +/// In opening fence, after the sequence (and optional whitespace), before info. /// /// ```markdown /// > | ~~~js @@ -233,7 +233,7 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence info. +/// In info. /// /// ```markdown /// > | ~~~js @@ -271,7 +271,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence, after the info and whitespace, before the meta. +/// In opening fence, after info and whitespace, before meta. /// /// ```markdown /// > | ~~~js eval @@ -290,7 +290,7 @@ pub fn meta_before(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the opening fence meta. +/// In meta. /// /// ```markdown /// > | ~~~js eval @@ -319,7 +319,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { } } -/// At an eol/eof in code, before a non-lazy closing fence or content. +/// At eol/eof in code, before a non-lazy closing fence or content. /// /// ```markdown /// > | ~~~js @@ -336,7 +336,7 @@ pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::CodeFencedCloseBefore) } -/// Before a closing fence, at the line ending. +/// Before closing fence, at eol. /// /// ```markdown /// | ~~~js @@ -356,7 +356,7 @@ pub fn close_before(tokenizer: &mut Tokenizer) -> State { } } -/// Before a closing fence, before optional whitespace. +/// Before closing fence, at optional whitespace. /// /// ```markdown /// | ~~~js @@ -383,7 +383,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { )) } -/// In a closing fence, after optional whitespace, before sequence. +/// In closing fence, after optional whitespace, at sequence. /// /// ```markdown /// | ~~~js @@ -401,7 +401,7 @@ pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { } } -/// In the closing fence sequence. +/// In closing fence sequence. /// /// ```markdown /// | ~~~js @@ -434,7 +434,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { } } -/// After the closing fence sequence after optional whitespace. +/// After closing fence sequence, after optional whitespace. /// /// ```markdown /// | ~~~js @@ -452,7 +452,7 @@ pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State { } } -/// Before a closing fence, at the line ending. +/// Before closing fence, at eol. /// /// ```markdown /// | ~~~js @@ -466,6 +466,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Name::LineEnding); State::Next(StateName::CodeFencedContentStart) } + /// Before code content, definitely not before a closing fence. /// /// ```markdown @@ -486,7 +487,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { )) } -/// Before code content, after a prefix. +/// Before code content, after optional prefix. /// /// ```markdown /// | ~~~js diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 2ab117e..3a82dc4 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -95,7 +95,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// Inside code content. +/// In code content. /// /// ```markdown /// > | aaa @@ -127,7 +127,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { State::Ok } -/// Right at a line ending, trying to parse another indent. +/// At eol, trying to parse another indent. /// /// ```markdown /// > | aaa @@ -153,7 +153,7 @@ pub fn further_start(tokenizer: &mut Tokenizer) -> State { } } -/// At an eol, which is followed by an indented line. +/// At eol, followed by an indented line. /// /// ```markdown /// > | aaa diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index d321f64..d601583 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -112,7 +112,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// In the opening sequence. +/// In opening sequence. /// /// ```markdown /// > | `a` @@ -177,7 +177,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State { } } -/// In the closing sequence. +/// In closing sequence. /// /// ```markdown /// > | `a` diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 11f1062..2378c48 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -104,7 +104,7 @@ use crate::util::{ slice::{Position, Slice}, }; -/// At the start of a definition. +/// At start of a definition. /// /// ```markdown /// > | [a]: b "c" @@ -135,7 +135,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// At the start of a definition, after whitespace. +/// After optional whitespace, at `[`. /// /// ```markdown /// > | [a]: b "c" @@ -154,7 +154,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// After the label of a definition. +/// After label. /// /// ```markdown /// > | [a]: b "c" @@ -182,7 +182,7 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State { } } -/// After the marker. +/// After marker. /// /// ```markdown /// > | [a]: b "c" @@ -196,7 +196,7 @@ pub fn marker_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// Before a destination. +/// Before destination. /// /// ```markdown /// > | [a]: b "c" @@ -216,7 +216,7 @@ pub fn destination_before(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::DestinationStart) } -/// After a destination. +/// After destination. /// /// ```markdown /// > | [a]: b "c" @@ -248,7 +248,7 @@ pub fn destination_missing(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// After a definition. +/// After definition. /// /// ```markdown /// > | [a]: b @@ -264,7 +264,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab(tokenizer)) } -/// After a definition, after optional whitespace. +/// After definition, after optional whitespace. /// /// ```markdown /// > | [a]: b @@ -304,7 +304,7 @@ pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State { } } -/// After a destination, presumably before a title. +/// After destination, at whitespace. /// /// ```markdown /// > | [a]: b @@ -320,7 +320,7 @@ pub fn title_before(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// Before a title, after a line ending. +/// At title. /// /// ```markdown /// | [a]: b @@ -335,7 +335,7 @@ pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::TitleStart) } -/// After a title. +/// After title. /// /// ```markdown /// > | [a]: b "c" @@ -352,7 +352,7 @@ pub fn title_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab(tokenizer)) } -/// After a title, after optional whitespace. +/// After title, after optional whitespace. /// /// ```markdown /// > | [a]: b "c" diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index f5030aa..cec34d5 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -43,7 +43,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of a hard break (escape). +/// Start of hard break (escape). /// /// ```markdown /// > | a\ @@ -61,7 +61,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// At the end of a hard break (escape), after `\`. +/// After `\`, at eol. /// /// ```markdown /// > | a\ diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 17cf617..974158f 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Start of a heading (atx), after whitespace. +/// After optional whitespace, at `#`. /// /// ```markdown /// > | ## aa @@ -100,7 +100,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// In the opening sequence. +/// In opening sequence. /// /// ```markdown /// > | ## aa @@ -131,7 +131,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { } } -/// After something but before something else. +/// After something, before something else. /// /// ```markdown /// > | ## aa @@ -161,7 +161,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// In a further sequence (after whitespace). +/// In further sequence (after whitespace). /// /// Could be normal “visible” hashes in the heading or a final sequence. /// diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index e31ce76..8485f5a 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -65,7 +65,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::skip::opt_back as skip_opt_back; -/// At a line ending, presumably an underline. +/// At start of heading (setext) underline. /// /// ```markdown /// | aa @@ -100,7 +100,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After optional whitespace, presumably an underline. +/// After optional whitespace, at `-` or `=`. /// /// ```markdown /// | aa @@ -118,7 +118,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// In an underline sequence. +/// In sequence. /// /// ```markdown /// | aa @@ -143,7 +143,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } } -/// After an underline sequence, after optional whitespace. +/// After sequence, after optional whitespace. /// /// ```markdown /// | aa diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index b5e1815..2da4f47 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -124,7 +124,7 @@ const BASIC: u8 = 6; /// Symbol for `<x>` (condition 7). const COMPLETE: u8 = 7; -/// Start of HTML (flow), before optional whitespace. +/// Start of HTML (flow). /// /// ```markdown /// > | <x /> @@ -153,7 +153,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After optional whitespace, before `<`. +/// At `<`, after optional whitespace. /// /// ```markdown /// > | <x /> @@ -169,7 +169,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// After `<`, before a tag name or other stuff. +/// After `<`, at tag name or other stuff. /// /// ```markdown /// > | <x /> @@ -209,7 +209,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!`, so inside a declaration, comment, or CDATA. +/// After `<!`, at declaration, comment, or CDATA. /// /// ```markdown /// > | <!doctype> @@ -242,7 +242,7 @@ pub fn declaration_open(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!-`, inside a comment, before another `-`. +/// After `<!-`, inside a comment, at another `-`. /// /// ```markdown /// > | <!--xxx--> @@ -286,7 +286,7 @@ pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { } } -/// After `</`, in a closing tag, before a tag name. +/// After `</`, in closing tag, at tag name. /// /// ```markdown /// > | </x> @@ -303,7 +303,7 @@ pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State { } } -/// In a tag name. +/// In tag name. /// /// ```markdown /// > | <ab> @@ -372,7 +372,7 @@ pub fn tag_name(tokenizer: &mut Tokenizer) -> State { } } -/// After a closing slash of a basic tag name. +/// After closing slash of a basic tag name. /// /// ```markdown /// > | <div/> @@ -390,7 +390,7 @@ pub fn basic_self_closing(tokenizer: &mut Tokenizer) -> State { } } -/// After a closing slash of a complete tag name. +/// After closing slash of a complete tag name. /// /// ```markdown /// > | <x/> @@ -406,7 +406,7 @@ pub fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State { } } -/// At a place where an attribute name would be valid. +/// At an attribute name. /// /// At first, this state is used after a complete tag name, after whitespace, /// where it expects optional attributes or the end of the tag. @@ -444,7 +444,7 @@ pub fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State { } } -/// In an attribute name. +/// In attribute name. /// /// ```markdown /// > | <a :b> @@ -465,8 +465,8 @@ pub fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State { } } -/// After an attribute name, before an attribute initializer, the end of the -/// tag, or whitespace. +/// After attribute name, at an optional initializer, the end of the tag, or +/// whitespace. /// /// ```markdown /// > | <a b> @@ -488,8 +488,8 @@ pub fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State { } } -/// Before an unquoted, double quoted, or single quoted attribute value, -/// allowing whitespace. +/// Before unquoted, double quoted, or single quoted attribute value, allowing +/// whitespace. /// /// ```markdown /// > | <a b=c> @@ -516,7 +516,7 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { } } -/// In a double or single quoted attribute value. +/// In double or single quoted attribute value. /// /// ```markdown /// > | <a b="c"> @@ -543,7 +543,7 @@ pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { } } -/// In an unquoted attribute value. +/// In unquoted attribute value. /// /// ```markdown /// > | <a b=c> @@ -561,7 +561,7 @@ pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { } } -/// After a double or single quoted attribute value, before whitespace or the +/// After double or single quoted attribute value, before whitespace or the /// end of the tag. /// /// ```markdown @@ -617,7 +617,7 @@ pub fn complete_after(tokenizer: &mut Tokenizer) -> State { } } -/// Inside continuation of any HTML kind. +/// In continuation of any HTML kind. /// /// ```markdown /// > | <!--xxx--> @@ -668,7 +668,7 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { } } -/// In continuation, at an eol. +/// In continuation, at eol. /// /// ```markdown /// > | <x> @@ -683,7 +683,7 @@ pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::NonLazyContinuationStart) } -/// In continuation, at an eol, before non-lazy content. +/// In continuation, at eol, before non-lazy content. /// /// ```markdown /// > | <x> @@ -702,7 +702,7 @@ pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State { } } -/// In continuation, after an eol, before non-lazy content. +/// In continuation, before non-lazy content. /// /// ```markdown /// | <x> @@ -735,7 +735,7 @@ pub fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State { } } -/// In raw continuation, after `<`, expecting a `/`. +/// In raw continuation, after `<`, at `/`. /// /// ```markdown /// > | <script>console.log(1)</script> @@ -752,7 +752,7 @@ pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { } } -/// In raw continuation, after `</`, expecting or inside a raw tag name. +/// In raw continuation, after `</`, in a raw tag name. /// /// ```markdown /// > | <script>console.log(1)</script> @@ -807,7 +807,7 @@ pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State { } } -/// In declaration or instruction continuation, waiting for `>` to close it. +/// In declaration or instruction continuation, at `>`. /// /// ```markdown /// > | <!--> @@ -870,7 +870,7 @@ pub fn continuation_after(tokenizer: &mut Tokenizer) -> State { State::Ok } -/// Before a line ending, expecting a blank line. +/// Before eol, expecting blank line. /// /// ```markdown /// > | <div> diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index b92b9fa..fde78de 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -77,7 +77,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After `<`, before a tag name or other stuff. +/// After `<`, at tag name or other stuff. /// /// ```markdown /// > | a <b> c @@ -110,7 +110,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!`, so inside a declaration, comment, or CDATA. +/// After `<!`, at declaration, comment, or CDATA. /// /// ```markdown /// > | a <!doctype> c @@ -139,7 +139,7 @@ pub fn declaration_open(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!-`, inside a comment, before another `-`. +/// In a comment, after `<!-`, at another `-`. /// /// ```markdown /// > | a <!--b--> c @@ -155,7 +155,7 @@ pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!--`, inside a comment +/// After `<!--`, in a comment. /// /// > 👉 **Note**: [html (flow)][html_flow] does allow `<!-->` or `<!--->` as /// > empty comments. @@ -179,7 +179,7 @@ pub fn comment_start(tokenizer: &mut Tokenizer) -> State { } } -/// After `<!---`, inside a comment +/// After `<!---`, in a comment. /// /// > 👉 **Note**: [html (flow)][html_flow] does allow `<!-->` or `<!--->` as /// > empty comments. @@ -199,7 +199,7 @@ pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { } } -/// In a comment. +/// In comment. /// /// ```markdown /// > | a <!--b--> c @@ -223,7 +223,7 @@ pub fn comment(tokenizer: &mut Tokenizer) -> State { } } -/// In a comment, after `-`. +/// In comment, after `-`. /// /// ```markdown /// > | a <!--b--> c @@ -239,7 +239,7 @@ pub fn comment_close(tokenizer: &mut Tokenizer) -> State { } } -/// After `<![`, inside CDATA, expecting `CDATA[`. +/// After `<![`, in CDATA, expecting `CDATA[`. /// /// ```markdown /// > | a <![CDATA[>&<]]> b @@ -285,7 +285,7 @@ pub fn cdata(tokenizer: &mut Tokenizer) -> State { } } -/// In CDATA, after `]`. +/// In CDATA, after `]`, at another `]`. /// /// ```markdown /// > | a <![CDATA[>&<]]> b @@ -301,7 +301,7 @@ pub fn cdata_close(tokenizer: &mut Tokenizer) -> State { } } -/// In CDATA, after `]]`. +/// In CDATA, after `]]`, at `>`. /// /// ```markdown /// > | a <![CDATA[>&<]]> b @@ -315,7 +315,7 @@ pub fn cdata_end(tokenizer: &mut Tokenizer) -> State { } } -/// In a declaration. +/// In declaration. /// /// ```markdown /// > | a <!b> c @@ -335,7 +335,7 @@ pub fn declaration(tokenizer: &mut Tokenizer) -> State { } } -/// In an instruction. +/// In instruction. /// /// ```markdown /// > | a <?b?> c @@ -359,7 +359,7 @@ pub fn instruction(tokenizer: &mut Tokenizer) -> State { } } -/// In an instruction, after `?`. +/// In instruction, after `?`, at `>`. /// /// ```markdown /// > | a <?b?> c @@ -372,7 +372,7 @@ pub fn instruction_close(tokenizer: &mut Tokenizer) -> State { } } -/// After `</`, in a closing tag, before a tag name. +/// After `</`, in closing tag, at tag name. /// /// ```markdown /// > | a </b> c @@ -406,7 +406,7 @@ pub fn tag_close(tokenizer: &mut Tokenizer) -> State { } } -/// In a closing tag, after the tag name. +/// In closing tag, after tag name. /// /// ```markdown /// > | a </b> c @@ -426,7 +426,7 @@ pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State { } } -/// After `<x`, in an opening tag name. +/// After `<x`, in opening tag name. /// /// ```markdown /// > | a <b> c @@ -444,7 +444,7 @@ pub fn tag_open(tokenizer: &mut Tokenizer) -> State { } } -/// In an opening tag, after the tag name. +/// In opening tag, after tag name. /// /// ```markdown /// > | a <b> c @@ -473,7 +473,7 @@ pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State { } } -/// In an attribute name. +/// In attribute name. /// /// ```markdown /// > | a <b c> d @@ -490,8 +490,8 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { } } -/// After an attribute name, before an attribute initializer, the end of the -/// tag, or whitespace. +/// After attribute name, before initializer, the end of the tag, or +/// whitespace. /// /// ```markdown /// > | a <b c> d @@ -518,8 +518,8 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { } } -/// Before an unquoted, double quoted, or single quoted attribute value, -/// allowing whitespace. +/// Before unquoted, double quoted, or single quoted attribute value, allowing +/// whitespace. /// /// ```markdown /// > | a <b c=d> e @@ -551,7 +551,7 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { } } -/// In a double or single quoted attribute value. +/// In double or single quoted attribute value. /// /// ```markdown /// > | a <b c="d"> e @@ -582,7 +582,7 @@ pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { } } -/// In an unquoted attribute value. +/// In unquoted attribute value. /// /// ```markdown /// > | a <b c=d> e @@ -599,8 +599,8 @@ pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { } } -/// After a double or single quoted attribute value, before whitespace or the -/// end of the tag. +/// After double or single quoted attribute value, before whitespace or the end +/// of the tag. /// /// ```markdown /// > | a <b c="d"> e @@ -613,7 +613,7 @@ pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State } } -/// In certain circumstances of a complete tag where only an `>` is allowed. +/// In certain circumstances of a tag where only an `>` is allowed. /// /// ```markdown /// > | a <b c="d"> e @@ -631,7 +631,7 @@ pub fn end(tokenizer: &mut Tokenizer) -> State { } } -/// At an allowed line ending. +/// At eol. /// /// > 👉 **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. @@ -654,7 +654,7 @@ pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State { } } -/// After a line ending. +/// After eol, at optional whitespace. /// /// > 👉 **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. @@ -672,7 +672,7 @@ pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab(tokenizer)) } -/// After a line ending, after indent. +/// After eol, after optional whitespace. /// /// > 👉 **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index da53125..8801ea7 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -291,6 +291,10 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State { let is_link = tokenizer.events[label_start.start.0].name == Name::LabelLink; + // If this is a link, we need to mark earlier link starts as no longer + // viable for use (as they would otherwise contain a link). + // These link starts are still looking for balanced closing brackets, so + // we can’t remove them. if is_link { let mut index = 0; while index < tokenizer.tokenize_state.label_start_stack.len() { @@ -332,7 +336,7 @@ pub fn nok(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// Before a resource, at `(`. +/// At a resource. /// /// ```markdown /// > | [a](b) c @@ -351,7 +355,7 @@ pub fn resource_start(tokenizer: &mut Tokenizer) -> State { } } -/// At the start of a resource, after `(`, before a destination. +/// In resource, after `(`, at optional whitespace. /// /// ```markdown /// > | [a](b) c @@ -365,7 +369,7 @@ pub fn resource_before(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// At the start of a resource, after optional whitespace. +/// In resource, after optional whitespace, at `)` or a destination. /// /// ```markdown /// > | [a](b) c @@ -390,7 +394,7 @@ pub fn resource_open(tokenizer: &mut Tokenizer) -> State { } } -/// In a resource, after a destination, before optional whitespace. +/// In resource, after destination, at optional whitespace. /// /// ```markdown /// > | [a](b) c @@ -410,7 +414,12 @@ pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// Without destination. +/// At invalid destination. +/// +/// ```markdown +/// > | [a](<<) b +/// ^ +/// ``` pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; @@ -421,7 +430,7 @@ pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// In a resource, after a destination, after whitespace. +/// In resource, after destination and whitespace, at `(` or title. /// /// ```markdown /// > | [a](b ) c @@ -443,7 +452,7 @@ pub fn resource_between(tokenizer: &mut Tokenizer) -> State { } } -/// In a resource, after a title. +/// In resource, after title, at optional whitespace. /// /// ```markdown /// > | [a](b "c") d @@ -460,7 +469,7 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_eol(tokenizer)) } -/// In a resource, at the `)`. +/// In resource, at `)`. /// /// ```markdown /// > | [a](b) d @@ -479,7 +488,7 @@ pub fn resource_end(tokenizer: &mut Tokenizer) -> State { } } -/// In a reference (full), at the `[`. +/// In reference (full), at `[`. /// /// ```markdown /// > | [a][b] d @@ -501,7 +510,7 @@ pub fn reference_full(tokenizer: &mut Tokenizer) -> State { } } -/// In a reference (full), after `]`. +/// In reference (full), after `]`. /// /// ```markdown /// > | [a][b] d @@ -537,7 +546,7 @@ pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State { } } -/// In a reference (collapsed), at the `[`. +/// In reference (collapsed), at `[`. /// /// > 👉 **Note**: we only get here if the label is defined. /// @@ -558,7 +567,7 @@ pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State { } } -/// In a reference (collapsed), at the `]`. +/// In reference (collapsed), at `]`. /// /// > 👉 **Note**: we only get here if the label is defined. /// @@ -581,8 +590,8 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State { /// Resolve media. /// -/// This turns correct label start (image, link) and label end into links and -/// images, or turns them back into data. +/// This turns matching label start (image, link) and label ends into links and +/// images, and turns unmatched label starts back into data. #[allow(clippy::too_many_lines)] pub fn resolve(tokenizer: &mut Tokenizer) { let mut left = tokenizer.tokenize_state.label_start_list_loose.split_off(0); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index b8c8858..ce09f5b 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -52,7 +52,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After `!`, before a `[`. +/// After `!`, at `[`. /// /// ```markdown /// > | a ![b] c diff --git a/src/construct/list.rs b/src/construct/list.rs index 20be73c..206f823 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -79,7 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Start of list item, after whitespace. +/// After optional whitespace, at list item prefix. /// /// ```markdown /// > | * a @@ -89,10 +89,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Unordered. Some(b'*' | b'-') => { - tokenizer.check( - State::Next(StateName::ListNok), - State::Next(StateName::ListBeforeUnordered), - ); + tokenizer.check(State::Nok, State::Next(StateName::ListBeforeUnordered)); State::Retry(StateName::ThematicBreakStart) } Some(b'+') => State::Retry(StateName::ListBeforeUnordered), @@ -103,7 +100,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// Start of an unordered list item. +/// At unordered list item marker. /// /// The line is not a thematic break. /// @@ -116,7 +113,7 @@ pub fn before_unordered(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::ListMarker) } -/// Start of an ordered list item. +/// At ordered list item value. /// /// ```markdown /// > | * a @@ -128,7 +125,7 @@ pub fn before_ordered(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::ListValue) } -/// In an ordered list item value. +/// In ordered list item value. /// /// ```markdown /// > | 1. a @@ -152,7 +149,7 @@ pub fn value(tokenizer: &mut Tokenizer) -> State { } } -/// At a list item marker. +/// At list item marker. /// /// ```markdown /// > | * a @@ -167,7 +164,7 @@ pub fn marker(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::ListMarkerAfter) } -/// After a list item marker. +/// After list item marker. /// /// ```markdown /// > | * a @@ -184,7 +181,9 @@ pub fn marker_after(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::BlankLineStart) } -/// After a list item marker, not followed by a blank line. +/// After list item marker. +/// +/// The marker is not followed by a blank line. /// /// ```markdown /// > | * a @@ -201,7 +200,7 @@ pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::ListWhitespace) } -/// In whitespace after a marker. +/// After marker, at whitespace. /// /// ```markdown /// > | * a @@ -226,7 +225,7 @@ pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State { } } -/// After a list item marker, followed by no indent or more indent that needed. +/// After marker, followed by no indent or more indent that needed. /// /// ```markdown /// > | * a @@ -244,7 +243,7 @@ pub fn prefix_other(tokenizer: &mut Tokenizer) -> State { } } -/// After a list item prefix. +/// After list item prefix. /// /// ```markdown /// > | * a @@ -318,7 +317,6 @@ pub fn cont_blank(tokenizer: &mut Tokenizer) -> State { if container.blank_initial { State::Nok } else { - tokenizer.attempt(State::Next(StateName::ListOk), State::Nok); // Consume, optionally, at most `size`. State::Retry(space_or_tab_min_max(tokenizer, 0, size)) } @@ -338,21 +336,10 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State { container.blank_initial = false; - tokenizer.attempt(State::Next(StateName::ListOk), State::Nok); // Consume exactly `size`. State::Retry(space_or_tab_min_max(tokenizer, size, size)) } -/// A state fn to yield [`State::Ok`]. -pub fn ok(_tokenizer: &mut Tokenizer) -> State { - State::Ok -} - -/// A state fn to yield [`State::Nok`]. -pub fn nok(_tokenizer: &mut Tokenizer) -> State { - State::Nok -} - /// Find adjacent list items with the same marker. pub fn resolve(tokenizer: &mut Tokenizer) { let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![]; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 663b01b..c956a2c 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -38,7 +38,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::skip::opt as skip_opt; -/// Before a paragraph. +/// Before paragraph. /// /// ```markdown /// > | abc @@ -55,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// In a paragraph. +/// In paragraph. /// /// ```markdown /// > | abc diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs index 0175971..1818ef4 100644 --- a/src/construct/partial_bom.rs +++ b/src/construct/partial_bom.rs @@ -16,7 +16,7 @@ use crate::tokenizer::Tokenizer; const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF]; -/// Before a BOM. +/// Before BOM. /// /// ```text /// > | 0xEF 0xBB 0xBF @@ -31,7 +31,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Inside the BOM. +/// In BOM. /// /// ```text /// > | 0xEF 0xBB 0xBF diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index f9b7947..b6b0f59 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -11,7 +11,7 @@ use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// At the beginning of data. +/// At beginning of data. /// /// ```markdown /// > | abc @@ -79,11 +79,10 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { /// Merge adjacent data events. pub fn resolve(tokenizer: &mut Tokenizer) { - let len = tokenizer.events.len(); let mut index = 0; // Loop through events and merge adjacent data events. - while index < len { + while index < tokenizer.events.len() { let event = &tokenizer.events[index]; if event.kind == Kind::Enter && event.name == Name::Data { @@ -91,7 +90,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { let mut exit_far_index = exit_index; // Find multiple `data` events. - while exit_far_index + 1 < len + while exit_far_index + 1 < tokenizer.events.len() && tokenizer.events[exit_far_index + 1].name == Name::Data { exit_far_index += 2; diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index c1c1e10..dc5c904 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -75,7 +75,7 @@ use crate::event::{Content, Name}; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Before a destination. +/// Start of destination. /// /// ```markdown /// > | <aa> @@ -105,7 +105,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After `<`, before an enclosed destination. +/// After `<`, at an enclosed destination. /// /// ```markdown /// > | <aa> @@ -126,7 +126,7 @@ pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State { } } -/// In an enclosed destination. +/// In enclosed destination. /// /// ```markdown /// > | <aa> @@ -151,7 +151,7 @@ pub fn enclosed(tokenizer: &mut Tokenizer) -> State { } } -/// After `\`, in an enclosed destination. +/// After `\`, at a special character. /// /// ```markdown /// > | <a\*a> @@ -167,7 +167,7 @@ pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { } } -/// In a raw destination. +/// In raw destination. /// /// ```markdown /// > | aa @@ -209,7 +209,7 @@ pub fn raw(tokenizer: &mut Tokenizer) -> State { } } -/// After `\`, in a raw destination. +/// After `\`, at special character. /// /// ```markdown /// > | a\*a diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 0f7aa00..a1667e1 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -65,7 +65,7 @@ use crate::state::{Name as StateName, State}; use crate::subtokenize::link; use crate::tokenizer::Tokenizer; -/// Before a label. +/// Start of label. /// /// ```markdown /// > | [a] @@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// In a label, at something. +/// In label, at something, before something else. /// /// ```markdown /// > | [a] @@ -142,7 +142,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// In a label, after whitespace. +/// In label, after whitespace. /// /// ```markdown /// | [a␊ @@ -154,7 +154,7 @@ pub fn eol_after(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::LabelAtBreak) } -/// In a label, at a blank line. +/// In label, at blank line. /// /// ```markdown /// | [a␊ @@ -168,7 +168,7 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// In a label, in text. +/// In label, in text. /// /// ```markdown /// > | [a] @@ -200,7 +200,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } } -/// After `\` in a label. +/// After `\`, at a special character. /// /// ```markdown /// > | [a\*a] diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 76854c8..3bbf7cc 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -14,7 +14,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of continuation. +/// At eol, before continuation. /// /// ```markdown /// > | * ```js @@ -33,7 +33,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After line ending. +/// A continuation. /// /// ```markdown /// | * ```js diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index f2d9a73..5f1b4cf 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -61,7 +61,7 @@ pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName::SpaceOrTabStart } -/// Before `space_or_tab`. +/// Start of `space_or_tab`. /// /// ```markdown /// > | a␠␠b diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs index 2127fe6..08f4bf2 100644 --- a/src/construct/partial_space_or_tab_eol.rs +++ b/src/construct/partial_space_or_tab_eol.rs @@ -44,7 +44,16 @@ pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options StateName::SpaceOrTabEolStart } -pub fn eol_start(tokenizer: &mut Tokenizer) -> State { +/// Start of whitespace with at most one eol. +/// +/// ```markdown +/// > | a␠␠b +/// ^ +/// > | a␠␠␊ +/// ^ +/// | ␠␠b +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::SpaceOrTabEolAfterFirst), State::Next(StateName::SpaceOrTabEolAtEol), @@ -65,7 +74,16 @@ pub fn eol_start(tokenizer: &mut Tokenizer) -> State { )) } -pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { +/// After initial whitespace, at optional eol. +/// +/// ```markdown +/// > | a␠␠b +/// ^ +/// > | a␠␠␊ +/// ^ +/// | ␠␠b +/// ``` +pub fn after_first(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_eol_ok = true; if tokenizer @@ -79,14 +97,19 @@ pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::SpaceOrTabEolAtEol) } -/// `space_or_tab_eol`: after optionally first `space_or_tab`. +/// After optional whitespace, at eol. /// /// ```markdown -/// > | a +/// > | a␠␠b +/// ^ +/// > | a␠␠␊ +/// ^ +/// | ␠␠b +/// > | a␊ /// ^ -/// | b +/// | ␠␠b /// ``` -pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State { +pub fn at_eol(tokenizer: &mut Tokenizer) -> State { if let Some(b'\n') = tokenizer.current { tokenizer.enter_with_content( Name::LineEnding, @@ -123,15 +146,17 @@ pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State { } } -/// `space_or_tab_eol`: after eol. +/// After eol. /// /// ```markdown -/// | a -/// > | b +/// | a␠␠␊ +/// > | ␠␠b +/// ^ +/// | a␊ +/// > | ␠␠b /// ^ /// ``` -#[allow(clippy::needless_pass_by_value)] -pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { +pub fn after_eol(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::SpaceOrTabEolAfterMore), State::Next(StateName::SpaceOrTabEolAfterMore), @@ -151,14 +176,17 @@ pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { )) } -/// `space_or_tab_eol`: after more (optional) `space_or_tab`. +/// After optional final whitespace. /// /// ```markdown -/// | a -/// > | b -/// ^ +/// | a␠␠␊ +/// > | ␠␠b +/// ^ +/// | a␊ +/// > | ␠␠b +/// ^ /// ``` -pub fn eol_after_more(tokenizer: &mut Tokenizer) -> State { +pub fn after_more(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_eol_content_type = None; tokenizer.tokenize_state.space_or_tab_eol_connect = false; tokenizer.tokenize_state.space_or_tab_eol_ok = false; diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 6f7a037..b97243e 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -36,7 +36,7 @@ use crate::state::{Name as StateName, State}; use crate::subtokenize::link; use crate::tokenizer::Tokenizer; -/// Before a title. +/// Start of title. /// /// ```markdown /// > | "a" @@ -57,9 +57,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// After the opening marker. +/// After opening marker. /// -/// This is also used when at the closing marker. +/// This is also used at the closing marker. /// /// ```markdown /// > | "a" @@ -132,7 +132,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// In a title, after whitespace. +/// In title, after whitespace. /// /// ```markdown /// | "a␊ @@ -144,7 +144,7 @@ pub fn after_eol(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::TitleAtBreak) } -/// In a title, at a blank line. +/// In title, at blank line. /// /// ```markdown /// | "a␊ @@ -158,7 +158,7 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State { State::Nok } -/// In title text. +/// In text. /// /// ```markdown /// > | "a" @@ -187,7 +187,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } } -/// After `\`, in title text. +/// After `\`, at a special character. /// /// ```markdown /// > | "a\*b" diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index f4d008d..b2989cb 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -54,7 +54,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Start of a thematic break. +/// Start of thematic break. /// /// ```markdown /// > | *** @@ -78,7 +78,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// Start of a thematic break, after whitespace. +/// After optional whitespace, at marker. /// /// ```markdown /// > | *** @@ -94,7 +94,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// After something but before something else. +/// After something, before something else. /// /// ```markdown /// > | *** @@ -124,7 +124,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } } -/// In a sequence of markers. +/// In sequence. /// /// ```markdown /// > | *** diff --git a/src/content/document.rs b/src/content/document.rs index 04f9dc6..59e6e7c 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -46,7 +46,7 @@ enum Phase { Eof, } -/// Turn `codes` as the document content type into events. +/// Parse a document. pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> { let mut tokenizer = Tokenizer::new(point, parse_state); @@ -66,9 +66,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> { events } -/// At the beginning. -/// -/// Perhaps a BOM? +/// Start of document, at an optional BOM. /// /// ```markdown /// > | a @@ -88,7 +86,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::BomStart) } -/// Before existing containers. +/// At optional existing containers. // /// ```markdown /// | * a @@ -121,7 +119,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { } } -/// After an existing container. +/// After continued existing container. // /// ```markdown /// | * a @@ -133,7 +131,7 @@ pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::DocumentContainerExistingBefore) } -/// Before a new container. +/// At new containers. // /// ```markdown /// > | * a @@ -183,7 +181,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::BlockQuoteStart) } -/// Maybe before a new container, but not a block quote. +/// At new container, but not a block quote. // /// ```markdown /// > | * a @@ -206,7 +204,7 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State State::Retry(StateName::ListStart) } -/// Maybe before a new container, but not a list. +/// At new container, but not a list (or block quote). // /// ```markdown /// > | a @@ -224,7 +222,7 @@ pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::DocumentContainersAfter) } -/// After a new container. +/// After new container. /// /// ```markdown /// > | * a @@ -258,7 +256,7 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::DocumentContainerNewBefore) } -/// After containers, before flow. +/// After containers, at flow. // /// ```markdown /// > | * a diff --git a/src/content/flow.rs b/src/content/flow.rs index c6bd398..08c7891 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -23,15 +23,15 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -/// Before flow. -/// -/// First we assume a blank line. +/// Start of flow. // /// ```markdown -/// | -/// |## alpha -/// | bravo -/// |*** +/// > | ## alpha +/// ^ +/// > | bravo +/// ^ +/// > | *** +/// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { @@ -81,6 +81,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { // Actual parsing: blank line? Indented code? Indented anything? // Also includes `-` which can be a setext heading underline or a thematic break. None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), + // Must be a paragraph. Some(_) => { tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); State::Retry(StateName::ParagraphStart) @@ -88,6 +89,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } +/// At blank line. +/// +/// ```markdown +/// > | ␠␠␊ +/// ^ +/// ``` pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowBlankLineAfter), @@ -96,17 +103,11 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::BlankLineStart) } -/// Before flow (initial). -/// -/// “Initial” flow means unprefixed flow, so right at the start of a line. -/// Interestingly, the only flow (initial) construct is indented code. -/// Move to `before` afterwards. +/// At code (indented). /// /// ```markdown -/// |qwe -/// | asd -/// |~~~js -/// |<div> +/// > | ␠␠␠␠a +/// ^ /// ``` pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( @@ -116,6 +117,12 @@ pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::CodeIndentedStart) } +/// At code (fenced). +/// +/// ````markdown +/// > | ``` +/// ^ +/// ```` pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), @@ -124,6 +131,12 @@ pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::CodeFencedStart) } +/// At html (flow). +/// +/// ```markdown +/// > | <a> +/// ^ +/// ``` pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), @@ -132,6 +145,12 @@ pub fn before_html(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::HtmlFlowStart) } +/// At heading (atx). +/// +/// ```markdown +/// > | # a +/// ^ +/// ``` pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), @@ -140,6 +159,13 @@ pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::HeadingAtxStart) } +/// At heading (setext). +/// +/// ```markdown +/// | a +/// > | = +/// ^ +/// ``` pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), @@ -148,6 +174,12 @@ pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::HeadingSetextStart) } +/// At thematic break. +/// +/// ```markdown +/// > | *** +/// ^ +/// ``` pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), @@ -156,6 +188,12 @@ pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::ThematicBreakStart) } +/// At definition. +/// +/// ```markdown +/// > | [a]: b +/// ^ +/// ``` pub fn before_definition(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), @@ -164,12 +202,22 @@ pub fn before_definition(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::DefinitionStart) } -/// After a blank line. +/// At paragraph. /// -/// Move to `start` afterwards. +/// ```markdown +/// > | a +/// ^ +/// ``` +pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); + State::Retry(StateName::ParagraphStart) +} + +/// After blank line. /// /// ```markdown -/// ␠␠| +/// > | ␠␠␊ +/// ^ /// ``` pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { @@ -186,14 +234,11 @@ pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State { } } -/// After something. +/// After flow. /// /// ```markdown -/// ## alpha| -/// | -/// ~~~js -/// asd -/// ~~~| +/// > | # a␊ +/// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { @@ -207,13 +252,3 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { _ => unreachable!("expected eol/eof"), } } - -/// Before a paragraph. -/// -/// ```markdown -/// |asd -/// ``` -pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); - State::Retry(StateName::ParagraphStart) -} diff --git a/src/content/string.rs b/src/content/string.rs index 1eefd30..ec4fce2 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -17,9 +17,15 @@ use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; +/// Characters that can start something in string. const MARKERS: [u8; 2] = [b'&', b'\\']; /// Start of string. +/// +/// ````markdown +/// > | ```js +/// ^ +/// ```` pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver(ResolveName::String); tokenizer.tokenize_state.markers = &MARKERS; @@ -27,6 +33,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } /// Before string. +/// +/// ````markdown +/// > | ```js +/// ^ +/// ```` pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, @@ -49,12 +60,17 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } /// At data. +/// +/// ````markdown +/// > | ```js +/// ^ +/// ```` pub fn before_data(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok); State::Retry(StateName::DataStart) } -/// Resolve whitespace. +/// Resolve whitespace in string. pub fn resolve(tokenizer: &mut Tokenizer) { resolve_whitespace(tokenizer, false, false); } diff --git a/src/content/text.rs b/src/content/text.rs index 6509d30..5c13dba 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -25,6 +25,7 @@ use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; +/// Characters that can start something in text. const MARKERS: [u8; 9] = [ b'!', // `label_start_image` b'&', // `character_reference` @@ -38,6 +39,11 @@ const MARKERS: [u8; 9] = [ ]; /// Start of text. +/// +/// ```markdown +/// > | abc +/// ^ +/// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver(ResolveName::Text); tokenizer.tokenize_state.markers = &MARKERS; @@ -45,6 +51,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } /// Before text. +/// +/// ```markdown +/// > | abc +/// ^ +/// ``` pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, @@ -109,7 +120,14 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { } } -/// At `<`, which wasn’t an autolink: before HTML? +/// Before html (text). +/// +/// At `<`, which wasn’t an autolink. +/// +/// ```markdown +/// > | a <b> +/// ^ +/// ``` pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::TextBefore), @@ -118,7 +136,14 @@ pub fn before_html(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::HtmlTextStart) } -/// At `\`, which wasn’t a character escape: before a hard break? +/// Before hard break escape. +/// +/// At `\`, which wasn’t a character escape. +/// +/// ```markdown +/// > | a \␊ +/// ^ +/// ``` pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::TextBefore), @@ -127,7 +152,12 @@ pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::HardBreakEscapeStart) } -/// At data. +/// Before data. +/// +/// ```markdown +/// > | a +/// ^ +/// ``` pub fn before_data(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok); State::Retry(StateName::DataStart) diff --git a/src/event.rs b/src/event.rs index 664a609..51ecd86 100644 --- a/src/event.rs +++ b/src/event.rs @@ -1887,7 +1887,7 @@ pub enum Content { Text, } -/// A link to another event. +/// Link to another event. #[derive(Debug, Clone)] pub struct Link { pub previous: Option<usize>, @@ -1895,7 +1895,7 @@ pub struct Link { pub content_type: Content, } -/// A location in the document (`line`/`column`/`offset`). +/// Place in the document. /// /// The interface for the location in the document comes from unist `Point`: /// <https://github.com/syntax-tree/unist#point>. @@ -1916,7 +1916,7 @@ pub struct Point { pub vs: usize, } -/// Possible event kinds. +/// Event kinds. #[derive(Debug, PartialEq, Clone)] pub enum Kind { /// The start of something. @@ -1928,8 +1928,12 @@ pub enum Kind { /// Something semantic happening somewhere. #[derive(Debug, Clone)] pub struct Event { + /// Kind of event. pub kind: Kind, + /// Name of event. pub name: Name, + /// Place where this happens. pub point: Point, + /// Link to another event. pub link: Option<Link>, } @@ -72,7 +72,7 @@ impl LineEnding { "\r\n" => LineEnding::CarriageReturnLineFeed, "\r" => LineEnding::CarriageReturn, "\n" => LineEnding::LineFeed, - _ => unreachable!("invalid code"), + _ => unreachable!("invalid str"), } } } diff --git a/src/parser.rs b/src/parser.rs index dc2c07a..a8416ed 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,10 +4,10 @@ use crate::content::document::document; use crate::event::{Event, Point}; use crate::{Constructs, Options}; -/// Information needed, in all content types, when parsing markdown. +/// Info needed, in all content types, when parsing markdown. /// /// Importantly, this contains a set of known definitions. -/// It also references the input value as a `Vec<char>`. +/// It also references the input value as bytes (`u8`). #[derive(Debug)] pub struct ParseState<'a> { pub constructs: &'a Constructs, @@ -19,7 +19,7 @@ pub struct ParseState<'a> { /// Turn a string of markdown into events. /// -/// Passes the codes back so the compiler can access the source. +/// Passes the bytes back so the compiler can access the source. pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) { let mut parse_state = ParseState { constructs: &options.constructs, diff --git a/src/resolve.rs b/src/resolve.rs index e72b2a2..e7d63f9 100644 --- a/src/resolve.rs +++ b/src/resolve.rs @@ -2,7 +2,7 @@ use crate::construct; use crate::content; use crate::tokenizer::Tokenizer; -/// Names of functions to move to. +/// Names of functions that resolve. #[derive(Debug, Clone, Copy, PartialEq)] pub enum Name { Label, @@ -16,7 +16,7 @@ pub enum Name { Text, } -/// Call the corresponding function for a state name. +/// Call the corresponding resolver. pub fn call(tokenizer: &mut Tokenizer, name: Name) { let func = match name { Name::Label => construct::label_end::resolve, diff --git a/src/state.rs b/src/state.rs index d5055e2..190683c 100644 --- a/src/state.rs +++ b/src/state.rs @@ -36,7 +36,6 @@ pub enum Name { BlankLineAfter, BlockQuoteStart, - BlockQuoteBefore, BlockQuoteContStart, BlockQuoteContBefore, BlockQuoteContAfter, @@ -245,7 +244,6 @@ pub enum Name { ListStart, ListBefore, - ListNok, ListBeforeOrdered, ListBeforeUnordered, ListValue, @@ -259,7 +257,6 @@ pub enum Name { ListContStart, ListContBlank, ListContFilled, - ListOk, NonLazyContinuationStart, NonLazyContinuationAfter, @@ -322,7 +319,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::BlankLineAfter => construct::blank_line::after, Name::BlockQuoteStart => construct::block_quote::start, - Name::BlockQuoteBefore => construct::block_quote::before, Name::BlockQuoteContStart => construct::block_quote::cont_start, Name::BlockQuoteContBefore => construct::block_quote::cont_before, Name::BlockQuoteContAfter => construct::block_quote::cont_after, @@ -562,7 +558,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::ListStart => construct::list::start, Name::ListBefore => construct::list::before, - Name::ListNok => construct::list::nok, Name::ListBeforeOrdered => construct::list::before_ordered, Name::ListBeforeUnordered => construct::list::before_unordered, Name::ListValue => construct::list::value, @@ -576,7 +571,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::ListContStart => construct::list::cont_start, Name::ListContBlank => construct::list::cont_blank, Name::ListContFilled => construct::list::cont_filled, - Name::ListOk => construct::list::ok, Name::NonLazyContinuationStart => construct::partial_non_lazy_continuation::start, Name::NonLazyContinuationAfter => construct::partial_non_lazy_continuation::after, @@ -588,11 +582,11 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::SpaceOrTabInside => construct::partial_space_or_tab::inside, Name::SpaceOrTabAfter => construct::partial_space_or_tab::after, - Name::SpaceOrTabEolStart => construct::partial_space_or_tab_eol::eol_start, - Name::SpaceOrTabEolAfterFirst => construct::partial_space_or_tab_eol::eol_after_first, - Name::SpaceOrTabEolAfterEol => construct::partial_space_or_tab_eol::eol_after_eol, - Name::SpaceOrTabEolAtEol => construct::partial_space_or_tab_eol::eol_at_eol, - Name::SpaceOrTabEolAfterMore => construct::partial_space_or_tab_eol::eol_after_more, + Name::SpaceOrTabEolStart => construct::partial_space_or_tab_eol::start, + Name::SpaceOrTabEolAfterFirst => construct::partial_space_or_tab_eol::after_first, + Name::SpaceOrTabEolAfterEol => construct::partial_space_or_tab_eol::after_eol, + Name::SpaceOrTabEolAtEol => construct::partial_space_or_tab_eol::at_eol, + Name::SpaceOrTabEolAfterMore => construct::partial_space_or_tab_eol::after_more, Name::StringStart => content::string::start, Name::StringBefore => content::string::before, diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 588d2a3..432c198 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -27,7 +27,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::edit_map::EditMap; -/// Create a link between two [`Event`][]s. +/// Link two [`Event`][]s. /// /// Arbitrary (void) events can be linked together. /// This optimizes for the common case where the token at `index` is connected @@ -60,7 +60,7 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { /// Parse linked events. /// -/// Supposed to be called repeatedly, returns `1: true` when done. +/// Supposed to be called repeatedly, returns `true` when done. pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { let mut map = EditMap::new(); let mut done = true; @@ -123,36 +123,35 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { done } -/// Parse linked events. -/// -/// Supposed to be called repeatedly, returns `1: true` when done. +/// Divide `child_events` over links in `events`, the first of which is at +/// `link_index`. pub fn divide_events( map: &mut EditMap, events: &[Event], mut link_index: usize, child_events: &mut Vec<Event>, ) { - // Now, loop through all subevents to figure out which parts - // belong where and fix deep links. - let mut subindex = 0; + // Loop through `child_events` to figure out which parts belong where and + // fix deep links. + let mut child_index = 0; let mut slices = vec![]; let mut slice_start = 0; let mut old_prev: Option<usize> = None; - while subindex < child_events.len() { - let current = &child_events[subindex].point; + while child_index < child_events.len() { + let current = &child_events[child_index].point; let end = &events[link_index + 1].point; // Find the first event that starts after the end we’re looking // for. if current.index > end.index || (current.index == end.index && current.vs > end.vs) { slices.push((link_index, slice_start)); - slice_start = subindex; + slice_start = child_index; link_index = events[link_index].link.as_ref().unwrap().next.unwrap(); } // Fix sublinks. - if let Some(sublink_curr) = &child_events[subindex].link { + if let Some(sublink_curr) = &child_events[child_index].link { if sublink_curr.previous.is_some() { let old_prev = old_prev.unwrap(); let prev_event = &mut child_events[old_prev]; @@ -173,7 +172,7 @@ pub fn divide_events( // its `previous` index to account for the shifted events. // If it points to a next event, we also change the next event’s // reference back to *this* event. - if let Some(sublink_curr) = &child_events[subindex].link { + if let Some(sublink_curr) = &child_events[child_index].link { if let Some(next) = sublink_curr.next { let sublink_next = child_events[next].link.as_mut().unwrap(); @@ -188,7 +187,7 @@ pub fn divide_events( } } - subindex += 1; + child_index += 1; } if !child_events.is_empty() { @@ -200,10 +199,13 @@ pub fn divide_events( while index > 0 { index -= 1; - let start = slices[index].0; map.add( - start, - if start == events.len() { 0 } else { 2 }, + slices[index].0, + if slices[index].0 == events.len() { + 0 + } else { + 2 + }, child_events.split_off(slices[index].1), ); } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 0b51c48..3f60b86 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -76,7 +76,7 @@ pub struct Media { /// Different kinds of attempts. #[derive(Debug, PartialEq)] enum AttemptKind { - /// Discard what was tokenizer when unsuccessful. + /// Discard what was tokenized when unsuccessful. Attempt, /// Discard always. Check, @@ -356,7 +356,7 @@ impl<'a> Tokenizer<'a> { } } - /// Prepare for a next code to get consumed. + /// Prepare for a next byte to get consumed. fn expect(&mut self, byte: Option<u8>) { debug_assert!(self.consumed, "expected previous byte to be consumed"); self.consumed = false; @@ -368,11 +368,10 @@ impl<'a> Tokenizer<'a> { /// used, or call a next function. pub fn consume(&mut self) { debug_assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned"); - self.move_one(); self.previous = self.current; - // While we’re not at the eof, it is at least better to not have the + // While we’re not at eof, it is at least better to not have the // same current code as `previous` *and* `current`. self.current = None; // Mark as consumed. @@ -427,6 +426,7 @@ impl<'a> Tokenizer<'a> { self.enter_with_link(name, None); } + /// Enter with a content type. pub fn enter_with_content(&mut self, name: Name, content_type_opt: Option<Content>) { self.enter_with_link( name, @@ -438,6 +438,7 @@ impl<'a> Tokenizer<'a> { ); } + /// Enter with a link. pub fn enter_with_link(&mut self, name: Name, link: Option<Link>) { let mut point = self.point.clone(); move_point_back(self, &mut point); @@ -663,7 +664,7 @@ fn push_impl( }; } State::Retry(name) => { - log::debug!("retry: {:?}", name); + log::debug!("retry: `{:?}`", name); state = call(tokenizer, name); } } diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index 59adfca..11ac486 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -53,8 +53,7 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) { } } -/// Make it easy to insert and remove things while being performant and keeping -/// links in check. +/// Tracks a bunch of edits. #[derive(Debug)] pub struct EditMap { /// Record of changes. diff --git a/src/util/skip.rs b/src/util/skip.rs index 371418f..46cbb4a 100644 --- a/src/util/skip.rs +++ b/src/util/skip.rs @@ -12,15 +12,18 @@ pub fn opt_back(events: &[Event], index: usize, names: &[Name]) -> usize { skip_opt_impl(events, index, names, false) } -pub fn to_back(events: &[Event], index: usize, names: &[Name]) -> usize { - to_impl(events, index, names, false) -} - +/// Skip from `index` forwards to `names`. pub fn to(events: &[Event], index: usize, names: &[Name]) -> usize { to_impl(events, index, names, true) } -pub fn to_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool) -> usize { +/// Skip from `index` backwards to `names`. +pub fn to_back(events: &[Event], index: usize, names: &[Name]) -> usize { + to_impl(events, index, names, false) +} + +/// Skip to something. +fn to_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool) -> usize { while index < events.len() { let current = &events[index].name; @@ -34,7 +37,7 @@ pub fn to_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool index } -/// Skip internals. +/// Skip past things. fn skip_opt_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool) -> usize { let mut balance = 0; let open = if forward { Kind::Enter } else { Kind::Exit }; diff --git a/src/util/slice.rs b/src/util/slice.rs index 34adf32..e70078a 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -4,7 +4,7 @@ use crate::constant::TAB_SIZE; use crate::event::{Event, Kind, Point}; use std::str; -/// A range between two places. +/// A range between two points. #[derive(Debug)] pub struct Position<'a> { pub start: &'a Point, @@ -53,9 +53,9 @@ impl<'a> Position<'a> { } } -/// Chars belonging to a range. +/// Bytes belonging to a range. /// -/// Includes information on virtual spaces before and after the chars. +/// Includes information on virtual spaces before and after the bytes. #[derive(Debug)] pub struct Slice<'a> { pub bytes: &'a [u8], @@ -64,7 +64,7 @@ pub struct Slice<'a> { } impl<'a> Slice<'a> { - /// Get the slice belonging to a point. + /// Get a slice for a single point. pub fn from_point(bytes: &'a [u8], point: &Point) -> Slice<'a> { let mut before = point.vs; let mut start = point.index; @@ -88,16 +88,14 @@ impl<'a> Slice<'a> { } } - /// Create a slice from one index. - /// - /// Indices are places in `bytes`. + /// Get a slice for a single index. /// /// > 👉 **Note**: indices cannot represent virtual spaces. pub fn from_index(bytes: &'a [u8], index: usize) -> Slice<'a> { Slice::from_indices(bytes, index, index + 1) } - /// Get the slice belonging to a position. + /// Get a slice for a position. pub fn from_position(bytes: &'a [u8], position: &Position) -> Slice<'a> { let mut before = position.start.vs; let mut after = position.end.vs; @@ -125,9 +123,7 @@ impl<'a> Slice<'a> { } } - /// Create a slice from two indices. - /// - /// Indices are places in `bytes`. + /// Get a slice for two indices. /// /// > 👉 **Note**: indices cannot represent virtual spaces. pub fn from_indices(bytes: &'a [u8], start: usize, end: usize) -> Slice<'a> { @@ -157,7 +153,7 @@ impl<'a> Slice<'a> { /// Turn the slice into a `&str`. /// - /// Does not support virtual spaces. + /// > 👉 **Note**: cannot represent virtual spaces. pub fn as_str(&self) -> &str { str::from_utf8(self.bytes).unwrap() } |