aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compiler.rs2
-rw-r--r--src/construct/attention.rs388
-rw-r--r--src/construct/autolink.rs18
-rw-r--r--src/construct/blank_line.rs4
-rw-r--r--src/construct/block_quote.rs34
-rw-r--r--src/construct/character_escape.rs4
-rw-r--r--src/construct/character_reference.rs12
-rw-r--r--src/construct/code_fenced.rs29
-rw-r--r--src/construct/code_indented.rs6
-rw-r--r--src/construct/code_text.rs4
-rw-r--r--src/construct/definition.rs24
-rw-r--r--src/construct/hard_break_escape.rs4
-rw-r--r--src/construct/heading_atx.rs8
-rw-r--r--src/construct/heading_setext.rs8
-rw-r--r--src/construct/html_flow.rs52
-rw-r--r--src/construct/html_text.rs60
-rw-r--r--src/construct/label_end.rs37
-rw-r--r--src/construct/label_start_image.rs2
-rw-r--r--src/construct/list.rs39
-rw-r--r--src/construct/paragraph.rs4
-rw-r--r--src/construct/partial_bom.rs4
-rw-r--r--src/construct/partial_data.rs7
-rw-r--r--src/construct/partial_destination.rs12
-rw-r--r--src/construct/partial_label.rs12
-rw-r--r--src/construct/partial_non_lazy_continuation.rs4
-rw-r--r--src/construct/partial_space_or_tab.rs2
-rw-r--r--src/construct/partial_space_or_tab_eol.rs60
-rw-r--r--src/construct/partial_title.rs14
-rw-r--r--src/construct/thematic_break.rs8
-rw-r--r--src/content/document.rs20
-rw-r--r--src/content/flow.rs105
-rw-r--r--src/content/string.rs18
-rw-r--r--src/content/text.rs36
-rw-r--r--src/event.rs10
-rw-r--r--src/lib.rs2
-rw-r--r--src/parser.rs6
-rw-r--r--src/resolve.rs4
-rw-r--r--src/state.rs16
-rw-r--r--src/subtokenize.rs36
-rw-r--r--src/tokenizer.rs11
-rw-r--r--src/util/edit_map.rs3
-rw-r--r--src/util/skip.rs15
-rw-r--r--src/util/slice.rs20
43 files changed, 611 insertions, 553 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 8f3036a..a935367 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -12,7 +12,7 @@ use crate::util::{
use crate::{LineEnding, Options};
use std::str;
-/// Representation of a link or image, resource or reference.
+/// Link or image, resource or reference.
/// Reused for temporary definitions as well, in the first pass.
#[derive(Debug)]
struct Media {
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index e974fae..3a29d06 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -60,7 +60,7 @@ use crate::util::slice::Slice;
/// Character code kinds.
#[derive(Debug, PartialEq)]
-enum GroupKind {
+enum CharacterKind {
/// Whitespace.
///
/// ## Example
@@ -98,7 +98,7 @@ struct Sequence {
/// The depth in events where this sequence resides.
balance: usize,
/// The index into events where this sequence’s `Enter` currently resides.
- event_index: usize,
+ index: usize,
/// The (shifted) point where this sequence starts.
start_point: Point,
/// The (shifted) point where this sequence end.
@@ -111,7 +111,7 @@ struct Sequence {
close: bool,
}
-/// Before a sequence.
+/// At start of attention.
///
/// ```markdown
/// > | **
@@ -128,7 +128,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a sequence.
+/// In sequence.
///
/// ```markdown
/// > | **
@@ -136,7 +136,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
+ Some(b'*' | b'_') if tokenizer.current == Some(tokenizer.tokenize_state.marker) => {
tokenizer.consume();
State::Next(StateName::AttentionInside)
}
@@ -150,28 +150,28 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve attention sequences.
-#[allow(clippy::too_many_lines)]
pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut start = 0;
+ let mut index = 0;
let mut balance = 0;
let mut sequences = vec![];
- // Find sequences of sequences and information about them.
- while start < tokenizer.events.len() {
- let enter = &tokenizer.events[start];
+ // Find all sequences, gather info about them.
+ while index < tokenizer.events.len() {
+ let enter = &tokenizer.events[index];
if enter.kind == Kind::Enter {
balance += 1;
if enter.name == Name::AttentionSequence {
- let end = start + 1;
+ let end = index + 1;
let exit = &tokenizer.events[end];
let before_end = enter.point.index;
let before_start = if before_end < 4 { 0 } else { before_end - 4 };
- let string_before =
- String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]);
- let char_before = string_before.chars().last();
+ let char_before =
+ String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end])
+ .chars()
+ .last();
let after_start = exit.point.index;
let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() {
@@ -179,26 +179,27 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
} else {
after_start + 4
};
- let string_after =
- String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]);
- let char_after = string_after.chars().next();
+ let char_after =
+ String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end])
+ .chars()
+ .next();
let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point)
.head()
.unwrap();
let before = classify_character(char_before);
let after = classify_character(char_after);
- let open = after == GroupKind::Other
- || (after == GroupKind::Punctuation && before != GroupKind::Other);
+ let open = after == CharacterKind::Other
+ || (after == CharacterKind::Punctuation && before != CharacterKind::Other);
// To do: GFM strikethrough?
- // || attentionMarkers.includes(code)
- let close = before == GroupKind::Other
- || (before == GroupKind::Punctuation && after != GroupKind::Other);
+ // || char_after == '~'
+ let close = before == CharacterKind::Other
+ || (before == CharacterKind::Punctuation && after != CharacterKind::Other);
// To do: GFM strikethrough?
- // || attentionMarkers.includes(previous)
+ // || char_before == '~'
sequences.push(Sequence {
- event_index: start,
+ index,
balance,
start_point: enter.point.clone(),
end_point: exit.point.clone(),
@@ -206,12 +207,12 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
open: if marker == b'*' {
open
} else {
- open && (before != GroupKind::Other || !close)
+ open && (before != CharacterKind::Other || !close)
},
close: if marker == b'*' {
close
} else {
- close && (after != GroupKind::Other || !open)
+ close && (after != CharacterKind::Other || !open)
},
marker,
});
@@ -220,10 +221,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
balance -= 1;
}
- start += 1;
+ index += 1;
}
- // Walk through sequences and match them.
+ // Now walk through them and match them.
let mut close = 0;
while close < sequences.len() {
@@ -240,7 +241,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
let sequence_open = &sequences[open];
- // We found a sequence that can open the closer we found.
+ // An opener matching our closer:
if sequence_open.open
&& sequence_close.marker == sequence_open.marker
&& sequence_close.balance == sequence_open.balance
@@ -257,175 +258,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
}
// We’ve found a match!
-
- // Number of markers to use from the sequence.
- let take = if sequence_open.size > 1 && sequence_close.size > 1 {
- 2
- } else {
- 1
- };
-
- // We’re *on* a closing sequence, with a matching opening
- // sequence.
- // Now we make sure that we can’t have misnested attention:
- //
- // ```html
- // <em>a <strong>b</em> c</strong>
- // ```
- //
- // Do that by marking everything between it as no longer
- // possible to open anything.
- // Theoretically we could mark non-closing as well, but we
- // don’t look for closers backwards.
- let mut between = open + 1;
-
- while between < close {
- sequences[between].open = false;
- between += 1;
- }
-
- let sequence_close = &mut sequences[close];
- let close_event_index = sequence_close.event_index;
- let seq_close_enter = sequence_close.start_point.clone();
- // No need to worry about `VS`, because sequences are only actual characters.
- sequence_close.size -= take;
- sequence_close.start_point.column += take;
- sequence_close.start_point.index += take;
- let seq_close_exit = sequence_close.start_point.clone();
-
- // Stay on this closing sequence for the next iteration: it
- // might close more things.
- next_index -= 1;
-
- // Remove closing sequence if fully used.
- if sequence_close.size == 0 {
- sequences.remove(close);
- tokenizer.map.add(close_event_index, 2, vec![]);
- } else {
- // Shift remaining closing sequence forward.
- // Do it here because a sequence can open and close different
- // other sequences, and the remainder can be on any side or
- // somewhere in the middle.
- let mut enter = &mut tokenizer.events[close_event_index];
- enter.point = seq_close_exit.clone();
- }
-
- let sequence_open = &mut sequences[open];
- let open_event_index = sequence_open.event_index;
- let seq_open_exit = sequence_open.end_point.clone();
- // No need to worry about `VS`, because sequences are only actual characters.
- sequence_open.size -= take;
- sequence_open.end_point.column -= take;
- sequence_open.end_point.index -= take;
- let seq_open_enter = sequence_open.end_point.clone();
-
- // Remove opening sequence if fully used.
- if sequence_open.size == 0 {
- sequences.remove(open);
- tokenizer.map.add(open_event_index, 2, vec![]);
- next_index -= 1;
- } else {
- // Shift remaining opening sequence backwards.
- // See note above for why that happens here.
- let mut exit = &mut tokenizer.events[open_event_index + 1];
- exit.point = seq_open_enter.clone();
- }
-
- // Opening.
- tokenizer.map.add_before(
- // Add after the current sequence (it might remain).
- open_event_index + 2,
- 0,
- vec![
- Event {
- kind: Kind::Enter,
- name: if take == 1 {
- Name::Emphasis
- } else {
- Name::Strong
- },
- point: seq_open_enter.clone(),
- link: None,
- },
- Event {
- kind: Kind::Enter,
- name: if take == 1 {
- Name::EmphasisSequence
- } else {
- Name::StrongSequence
- },
- point: seq_open_enter.clone(),
- link: None,
- },
- Event {
- kind: Kind::Exit,
- name: if take == 1 {
- Name::EmphasisSequence
- } else {
- Name::StrongSequence
- },
- point: seq_open_exit.clone(),
- link: None,
- },
- Event {
- kind: Kind::Enter,
- name: if take == 1 {
- Name::EmphasisText
- } else {
- Name::StrongText
- },
- point: seq_open_exit.clone(),
- link: None,
- },
- ],
- );
- // Closing.
- tokenizer.map.add(
- close_event_index,
- 0,
- vec![
- Event {
- kind: Kind::Exit,
- name: if take == 1 {
- Name::EmphasisText
- } else {
- Name::StrongText
- },
- point: seq_close_enter.clone(),
- link: None,
- },
- Event {
- kind: Kind::Enter,
- name: if take == 1 {
- Name::EmphasisSequence
- } else {
- Name::StrongSequence
- },
- point: seq_close_enter.clone(),
- link: None,
- },
- Event {
- kind: Kind::Exit,
- name: if take == 1 {
- Name::EmphasisSequence
- } else {
- Name::StrongSequence
- },
- point: seq_close_exit.clone(),
- link: None,
- },
- Event {
- kind: Kind::Exit,
- name: if take == 1 {
- Name::Emphasis
- } else {
- Name::Strong
- },
- point: seq_close_exit.clone(),
- link: None,
- },
- ],
- );
+ next_index = match_sequences(tokenizer, &mut sequences, open, close);
break;
}
@@ -439,14 +272,159 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
let mut index = 0;
while index < sequences.len() {
let sequence = &sequences[index];
- tokenizer.events[sequence.event_index].name = Name::Data;
- tokenizer.events[sequence.event_index + 1].name = Name::Data;
+ tokenizer.events[sequence.index].name = Name::Data;
+ tokenizer.events[sequence.index + 1].name = Name::Data;
index += 1;
}
tokenizer.map.consume(&mut tokenizer.events);
}
+/// Match two sequences.
+fn match_sequences(
+ tokenizer: &mut Tokenizer,
+ sequences: &mut Vec<Sequence>,
+ open: usize,
+ close: usize,
+) -> usize {
+ // Where to move to next.
+ // Stay on this closing sequence for the next iteration: it
+ // might close more things.
+ // It’s changed if sequences are removed.
+ let mut next = close;
+
+ // Number of markers to use from the sequence.
+ let take = if sequences[open].size > 1 && sequences[close].size > 1 {
+ 2
+ } else {
+ 1
+ };
+
+ // We’re *on* a closing sequence, with a matching opening
+ // sequence.
+ // Now we make sure that we can’t have misnested attention:
+ //
+ // ```html
+ // <em>a <strong>b</em> c</strong>
+ // ```
+ //
+ // Do that by marking everything between it as no longer
+ // possible to open anything.
+ // Theoretically we should mark as `close: false` too, but
+ // we don’t look for closers backwards, so it’s not needed.
+ let mut between = open + 1;
+
+ while between < close {
+ sequences[between].open = false;
+ between += 1;
+ }
+
+ let (group_name, seq_name, text_name) = if take == 1 {
+ (Name::Emphasis, Name::EmphasisSequence, Name::EmphasisText)
+ } else {
+ (Name::Strong, Name::StrongSequence, Name::StrongText)
+ };
+ let open_index = sequences[open].index;
+ let close_index = sequences[close].index;
+ let open_exit = sequences[open].end_point.clone();
+ let close_enter = sequences[close].start_point.clone();
+
+ // No need to worry about `VS`, because sequences are only actual characters.
+ sequences[open].size -= take;
+ sequences[close].size -= take;
+ sequences[open].end_point.column -= take;
+ sequences[open].end_point.index -= take;
+ sequences[close].start_point.column += take;
+ sequences[close].start_point.index += take;
+
+ // Opening.
+ tokenizer.map.add_before(
+ // Add after the current sequence (it might remain).
+ open_index + 2,
+ 0,
+ vec![
+ Event {
+ kind: Kind::Enter,
+ name: group_name.clone(),
+ point: sequences[open].end_point.clone(),
+ link: None,
+ },
+ Event {
+ kind: Kind::Enter,
+ name: seq_name.clone(),
+ point: sequences[open].end_point.clone(),
+ link: None,
+ },
+ Event {
+ kind: Kind::Exit,
+ name: seq_name.clone(),
+ point: open_exit.clone(),
+ link: None,
+ },
+ Event {
+ kind: Kind::Enter,
+ name: text_name.clone(),
+ point: open_exit,
+ link: None,
+ },
+ ],
+ );
+ // Closing.
+ tokenizer.map.add(
+ close_index,
+ 0,
+ vec![
+ Event {
+ kind: Kind::Exit,
+ name: text_name,
+ point: close_enter.clone(),
+ link: None,
+ },
+ Event {
+ kind: Kind::Enter,
+ name: seq_name.clone(),
+ point: close_enter,
+ link: None,
+ },
+ Event {
+ kind: Kind::Exit,
+ name: seq_name,
+ point: sequences[close].start_point.clone(),
+ link: None,
+ },
+ Event {
+ kind: Kind::Exit,
+ name: group_name,
+ point: sequences[close].start_point.clone(),
+ link: None,
+ },
+ ],
+ );
+
+ // Remove closing sequence if fully used.
+ if sequences[close].size == 0 {
+ sequences.remove(close);
+ tokenizer.map.add(close_index, 2, vec![]);
+ } else {
+ // Shift remaining closing sequence forward.
+ // Do it here because a sequence can open and close different
+ // other sequences, and the remainder can be on any side or
+ // somewhere in the middle.
+ tokenizer.events[close_index].point = sequences[close].start_point.clone();
+ }
+
+ if sequences[open].size == 0 {
+ sequences.remove(open);
+ tokenizer.map.add(open_index, 2, vec![]);
+ // Everything shifts one to the left, account for it in next iteration.
+ next -= 1;
+ } else {
+ tokenizer.events[open_index + 1].point = sequences[open].end_point.clone();
+ }
+
+ next
+}
+
/// Classify whether a character code represents whitespace, punctuation, or
/// something else.
///
@@ -458,15 +436,15 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
/// ## References
///
/// * [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)
-fn classify_character(char: Option<char>) -> GroupKind {
+fn classify_character(char: Option<char>) -> CharacterKind {
match char {
// EOF.
- None => GroupKind::Whitespace,
+ None => CharacterKind::Whitespace,
// Unicode whitespace.
- Some(char) if char.is_whitespace() => GroupKind::Whitespace,
+ Some(char) if char.is_whitespace() => CharacterKind::Whitespace,
// Unicode punctuation.
- Some(char) if PUNCTUATION.contains(&char) => GroupKind::Punctuation,
+ Some(char) if PUNCTUATION.contains(&char) => CharacterKind::Punctuation,
// Everything else.
- Some(_) => GroupKind::Other,
+ Some(_) => CharacterKind::Other,
}
}
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index c0d9ae3..1bb8004 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -106,7 +106,7 @@ use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-/// Start of an autolink.
+/// Start of autolink.
///
/// ```markdown
/// > | a<https://example.com>b
@@ -128,7 +128,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<`, before the protocol.
+/// After `<`, at protocol or atext.
///
/// ```markdown
/// > | a<https://example.com>b
@@ -147,7 +147,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After the first byte of the protocol or email name.
+/// At second byte of protocol or atext.
///
/// ```markdown
/// > | a<https://example.com>b
@@ -167,7 +167,7 @@ pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside an ambiguous protocol or email name.
+/// In ambiguous protocol or atext.
///
/// ```markdown
/// > | a<https://example.com>b
@@ -197,7 +197,7 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside a URL, after the protocol.
+/// After protocol, in URL.
///
/// ```markdown
/// > | a<https://example.com>b
@@ -222,7 +222,7 @@ pub fn url_inside(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside email atext.
+/// In email atext.
///
/// ```markdown
/// > | a<user.name@example.com>b
@@ -261,7 +261,7 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After an at-sign or a dot in the label.
+/// In label, after at-sign or dot.
///
/// ```markdown
/// > | a<user.name@example.com>b
@@ -277,7 +277,7 @@ pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In the label, where `.` and `>` are allowed.
+/// In label, where `.` and `>` are allowed.
///
/// ```markdown
/// > | a<user.name@example.com>b
@@ -307,7 +307,7 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In the label, where `.` and `>` are *not* allowed.
+/// In label, where `.` and `>` are *not* allowed.
///
/// Though, this is also used in `email_label` to parse other values.
///
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index eeef637..81b58fc 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -36,7 +36,7 @@ use crate::construct::partial_space_or_tab::space_or_tab;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-/// Start of a blank line.
+/// Start of blank line.
///
/// > 👉 **Note**: `␠` represents a space character.
///
@@ -55,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab(tokenizer))
}
-/// After zero or more spaces or tabs, before a line ending or EOF.
+/// At eof/eol, after optional whitespace.
///
/// ```markdown
/// > | ␠␠␊
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 1a32f7d..6e660cb 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -47,39 +47,17 @@ use crate::tokenizer::Tokenizer;
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.block_quote {
- tokenizer.attempt(State::Next(StateName::BlockQuoteBefore), State::Nok);
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ))
+ tokenizer.enter(Name::BlockQuote);
+ State::Retry(StateName::BlockQuoteContStart)
} else {
State::Nok
}
}
-/// Start of block quote, after whitespace, before `>`.
-///
-/// ```markdown
-/// > | > a
-/// ^
-/// ```
-pub fn before(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'>') => {
- tokenizer.enter(Name::BlockQuote);
- State::Retry(StateName::BlockQuoteContBefore)
- }
- _ => State::Retry(StateName::BlockQuoteContBefore),
- }
-}
-
/// Start of block quote continuation.
///
+/// Also used to parse the first block quote opening.
+///
/// ```markdown
/// | > a
/// > | > b
@@ -98,7 +76,9 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
))
}
-/// After whitespace, before `>`.
+/// At `>`, after optional whitespace.
+///
+/// Also used to parse the first block quote opening.
///
/// ```markdown
/// | > a
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index e0f36c7..494f1d2 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -37,7 +37,7 @@ use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-/// Start of a character escape.
+/// Start of character escape.
///
/// ```markdown
/// > | a\*b
@@ -56,7 +56,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside a character escape, after `\`.
+/// After `\`, at punctuation.
///
/// ```markdown
/// > | a\*b
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 476ea14..6171927 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -70,7 +70,7 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::slice::Slice;
-/// Start of a character reference.
+/// Start of character reference.
///
/// ```markdown
/// > | a&amp;b
@@ -93,8 +93,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside a character reference, after `&`, before `#` for numeric references
-/// or an alphanumeric for named references.
+/// After `&`, at `#` for numeric references or alphanumeric for named
+/// references.
///
/// ```markdown
/// > | a&amp;b
@@ -117,8 +117,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside a numeric character reference, right before `x` for hexadecimals,
-/// or a digit for decimals.
+/// After `#`, at `x` for hexadecimals or digit for decimals.
///
/// ```markdown
/// > | a&#123;b
@@ -141,8 +140,7 @@ pub fn numeric(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside a character reference value, after the markers (`&#x`, `&#`, or
-/// `&`) that define its kind, but before the `;`.
+/// After markers (`&#x`, `&#`, or `&`), in value, before `;`.
///
/// The character reference kind defines what and how many characters are
/// allowed.
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index ed39917..ac9a63f 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -139,7 +139,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside the opening fence, after an optional prefix, before a sequence.
+/// In opening fence, after prefix, at sequence.
///
/// ```markdown
/// > | ~~~js
@@ -171,7 +171,7 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside the opening fence sequence.
+/// In opening fence sequence.
///
/// ```markdown
/// > | ~~~js
@@ -205,7 +205,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside the opening fence, after the sequence (and optional whitespace), before the info.
+/// In opening fence, after the sequence (and optional whitespace), before info.
///
/// ```markdown
/// > | ~~~js
@@ -233,7 +233,7 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside the opening fence info.
+/// In info.
///
/// ```markdown
/// > | ~~~js
@@ -271,7 +271,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside the opening fence, after the info and whitespace, before the meta.
+/// In opening fence, after info and whitespace, before meta.
///
/// ```markdown
/// > | ~~~js eval
@@ -290,7 +290,7 @@ pub fn meta_before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside the opening fence meta.
+/// In meta.
///
/// ```markdown
/// > | ~~~js eval
@@ -319,7 +319,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At an eol/eof in code, before a non-lazy closing fence or content.
+/// At eol/eof in code, before a non-lazy closing fence or content.
///
/// ```markdown
/// > | ~~~js
@@ -336,7 +336,7 @@ pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::CodeFencedCloseBefore)
}
-/// Before a closing fence, at the line ending.
+/// Before closing fence, at eol.
///
/// ```markdown
/// | ~~~js
@@ -356,7 +356,7 @@ pub fn close_before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Before a closing fence, before optional whitespace.
+/// Before closing fence, at optional whitespace.
///
/// ```markdown
/// | ~~~js
@@ -383,7 +383,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State {
))
}
-/// In a closing fence, after optional whitespace, before sequence.
+/// In closing fence, after optional whitespace, at sequence.
///
/// ```markdown
/// | ~~~js
@@ -401,7 +401,7 @@ pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In the closing fence sequence.
+/// In closing fence sequence.
///
/// ```markdown
/// | ~~~js
@@ -434,7 +434,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After the closing fence sequence after optional whitespace.
+/// After closing fence sequence, after optional whitespace.
///
/// ```markdown
/// | ~~~js
@@ -452,7 +452,7 @@ pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Before a closing fence, at the line ending.
+/// Before closing fence, at eol.
///
/// ```markdown
/// | ~~~js
@@ -466,6 +466,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Name::LineEnding);
State::Next(StateName::CodeFencedContentStart)
}
+
/// Before code content, definitely not before a closing fence.
///
/// ```markdown
@@ -486,7 +487,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State {
))
}
-/// Before code content, after a prefix.
+/// Before code content, after optional prefix.
///
/// ```markdown
/// | ~~~js
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 2ab117e..3a82dc4 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -95,7 +95,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside code content.
+/// In code content.
///
/// ```markdown
/// > | aaa
@@ -127,7 +127,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
State::Ok
}
-/// Right at a line ending, trying to parse another indent.
+/// At eol, trying to parse another indent.
///
/// ```markdown
/// > | aaa
@@ -153,7 +153,7 @@ pub fn further_start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At an eol, which is followed by an indented line.
+/// At eol, followed by an indented line.
///
/// ```markdown
/// > | aaa
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index d321f64..d601583 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -112,7 +112,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In the opening sequence.
+/// In opening sequence.
///
/// ```markdown
/// > | `a`
@@ -177,7 +177,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In the closing sequence.
+/// In closing sequence.
///
/// ```markdown
/// > | `a`
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 11f1062..2378c48 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -104,7 +104,7 @@ use crate::util::{
slice::{Position, Slice},
};
-/// At the start of a definition.
+/// At start of a definition.
///
/// ```markdown
/// > | [a]: b "c"
@@ -135,7 +135,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At the start of a definition, after whitespace.
+/// After optional whitespace, at `[`.
///
/// ```markdown
/// > | [a]: b "c"
@@ -154,7 +154,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After the label of a definition.
+/// After label.
///
/// ```markdown
/// > | [a]: b "c"
@@ -182,7 +182,7 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After the marker.
+/// After marker.
///
/// ```markdown
/// > | [a]: b "c"
@@ -196,7 +196,7 @@ pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_eol(tokenizer))
}
-/// Before a destination.
+/// Before destination.
///
/// ```markdown
/// > | [a]: b "c"
@@ -216,7 +216,7 @@ pub fn destination_before(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::DestinationStart)
}
-/// After a destination.
+/// After destination.
///
/// ```markdown
/// > | [a]: b "c"
@@ -248,7 +248,7 @@ pub fn destination_missing(tokenizer: &mut Tokenizer) -> State {
State::Nok
}
-/// After a definition.
+/// After definition.
///
/// ```markdown
/// > | [a]: b
@@ -264,7 +264,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab(tokenizer))
}
-/// After a definition, after optional whitespace.
+/// After definition, after optional whitespace.
///
/// ```markdown
/// > | [a]: b
@@ -304,7 +304,7 @@ pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After a destination, presumably before a title.
+/// After destination, at whitespace.
///
/// ```markdown
/// > | [a]: b
@@ -320,7 +320,7 @@ pub fn title_before(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_eol(tokenizer))
}
-/// Before a title, after a line ending.
+/// At title.
///
/// ```markdown
/// | [a]: b
@@ -335,7 +335,7 @@ pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::TitleStart)
}
-/// After a title.
+/// After title.
///
/// ```markdown
/// > | [a]: b "c"
@@ -352,7 +352,7 @@ pub fn title_after(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab(tokenizer))
}
-/// After a title, after optional whitespace.
+/// After title, after optional whitespace.
///
/// ```markdown
/// > | [a]: b "c"
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index f5030aa..cec34d5 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -43,7 +43,7 @@ use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-/// Start of a hard break (escape).
+/// Start of hard break (escape).
///
/// ```markdown
/// > | a\
@@ -61,7 +61,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At the end of a hard break (escape), after `\`.
+/// After `\`, at eol.
///
/// ```markdown
/// > | a\
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 17cf617..974158f 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Start of a heading (atx), after whitespace.
+/// After optional whitespace, at `#`.
///
/// ```markdown
/// > | ## aa
@@ -100,7 +100,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In the opening sequence.
+/// In opening sequence.
///
/// ```markdown
/// > | ## aa
@@ -131,7 +131,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After something but before something else.
+/// After something, before something else.
///
/// ```markdown
/// > | ## aa
@@ -161,7 +161,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a further sequence (after whitespace).
+/// In further sequence (after whitespace).
///
/// Could be normal “visible” hashes in the heading or a final sequence.
///
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index e31ce76..8485f5a 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -65,7 +65,7 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::skip::opt_back as skip_opt_back;
-/// At a line ending, presumably an underline.
+/// At start of heading (setext) underline.
///
/// ```markdown
/// | aa
@@ -100,7 +100,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After optional whitespace, presumably an underline.
+/// After optional whitespace, at `-` or `=`.
///
/// ```markdown
/// | aa
@@ -118,7 +118,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In an underline sequence.
+/// In sequence.
///
/// ```markdown
/// | aa
@@ -143,7 +143,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After an underline sequence, after optional whitespace.
+/// After sequence, after optional whitespace.
///
/// ```markdown
/// | aa
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index b5e1815..2da4f47 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -124,7 +124,7 @@ const BASIC: u8 = 6;
/// Symbol for `<x>` (condition 7).
const COMPLETE: u8 = 7;
-/// Start of HTML (flow), before optional whitespace.
+/// Start of HTML (flow).
///
/// ```markdown
/// > | <x />
@@ -153,7 +153,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After optional whitespace, before `<`.
+/// At `<`, after optional whitespace.
///
/// ```markdown
/// > | <x />
@@ -169,7 +169,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<`, before a tag name or other stuff.
+/// After `<`, at tag name or other stuff.
///
/// ```markdown
/// > | <x />
@@ -209,7 +209,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<!`, so inside a declaration, comment, or CDATA.
+/// After `<!`, at declaration, comment, or CDATA.
///
/// ```markdown
/// > | <!doctype>
@@ -242,7 +242,7 @@ pub fn declaration_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<!-`, inside a comment, before another `-`.
+/// After `<!-`, inside a comment, at another `-`.
///
/// ```markdown
/// > | <!--xxx-->
@@ -286,7 +286,7 @@ pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `</`, in a closing tag, before a tag name.
+/// After `</`, in closing tag, at tag name.
///
/// ```markdown
/// > | </x>
@@ -303,7 +303,7 @@ pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a tag name.
+/// In tag name.
///
/// ```markdown
/// > | <ab>
@@ -372,7 +372,7 @@ pub fn tag_name(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After a closing slash of a basic tag name.
+/// After closing slash of a basic tag name.
///
/// ```markdown
/// > | <div/>
@@ -390,7 +390,7 @@ pub fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After a closing slash of a complete tag name.
+/// After closing slash of a complete tag name.
///
/// ```markdown
/// > | <x/>
@@ -406,7 +406,7 @@ pub fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At a place where an attribute name would be valid.
+/// At an attribute name.
///
/// At first, this state is used after a complete tag name, after whitespace,
/// where it expects optional attributes or the end of the tag.
@@ -444,7 +444,7 @@ pub fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In an attribute name.
+/// In attribute name.
///
/// ```markdown
/// > | <a :b>
@@ -465,8 +465,8 @@ pub fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After an attribute name, before an attribute initializer, the end of the
-/// tag, or whitespace.
+/// After attribute name, at an optional initializer, the end of the tag, or
+/// whitespace.
///
/// ```markdown
/// > | <a b>
@@ -488,8 +488,8 @@ pub fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Before an unquoted, double quoted, or single quoted attribute value,
-/// allowing whitespace.
+/// Before unquoted, double quoted, or single quoted attribute value, allowing
+/// whitespace.
///
/// ```markdown
/// > | <a b=c>
@@ -516,7 +516,7 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a double or single quoted attribute value.
+/// In double or single quoted attribute value.
///
/// ```markdown
/// > | <a b="c">
@@ -543,7 +543,7 @@ pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In an unquoted attribute value.
+/// In unquoted attribute value.
///
/// ```markdown
/// > | <a b=c>
@@ -561,7 +561,7 @@ pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After a double or single quoted attribute value, before whitespace or the
+/// After double or single quoted attribute value, before whitespace or the
/// end of the tag.
///
/// ```markdown
@@ -617,7 +617,7 @@ pub fn complete_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside continuation of any HTML kind.
+/// In continuation of any HTML kind.
///
/// ```markdown
/// > | <!--xxx-->
@@ -668,7 +668,7 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In continuation, at an eol.
+/// In continuation, at eol.
///
/// ```markdown
/// > | <x>
@@ -683,7 +683,7 @@ pub fn continuation_start(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::NonLazyContinuationStart)
}
-/// In continuation, at an eol, before non-lazy content.
+/// In continuation, at eol, before non-lazy content.
///
/// ```markdown
/// > | <x>
@@ -702,7 +702,7 @@ pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In continuation, after an eol, before non-lazy content.
+/// In continuation, before non-lazy content.
///
/// ```markdown
/// | <x>
@@ -735,7 +735,7 @@ pub fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In raw continuation, after `<`, expecting a `/`.
+/// In raw continuation, after `<`, at `/`.
///
/// ```markdown
/// > | <script>console.log(1)</script>
@@ -752,7 +752,7 @@ pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In raw continuation, after `</`, expecting or inside a raw tag name.
+/// In raw continuation, after `</`, in a raw tag name.
///
/// ```markdown
/// > | <script>console.log(1)</script>
@@ -807,7 +807,7 @@ pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In declaration or instruction continuation, waiting for `>` to close it.
+/// In declaration or instruction continuation, at `>`.
///
/// ```markdown
/// > | <!-->
@@ -870,7 +870,7 @@ pub fn continuation_after(tokenizer: &mut Tokenizer) -> State {
State::Ok
}
-/// Before a line ending, expecting a blank line.
+/// Before eol, expecting blank line.
///
/// ```markdown
/// > | <div>
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index b92b9fa..fde78de 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -77,7 +77,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<`, before a tag name or other stuff.
+/// After `<`, at tag name or other stuff.
///
/// ```markdown
/// > | a <b> c
@@ -110,7 +110,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<!`, so inside a declaration, comment, or CDATA.
+/// After `<!`, at declaration, comment, or CDATA.
///
/// ```markdown
/// > | a <!doctype> c
@@ -139,7 +139,7 @@ pub fn declaration_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<!-`, inside a comment, before another `-`.
+/// In a comment, after `<!-`, at another `-`.
///
/// ```markdown
/// > | a <!--b--> c
@@ -155,7 +155,7 @@ pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<!--`, inside a comment
+/// After `<!--`, in a comment.
///
/// > 👉 **Note**: [html (flow)][html_flow] does allow `<!-->` or `<!--->` as
/// > empty comments.
@@ -179,7 +179,7 @@ pub fn comment_start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<!---`, inside a comment
+/// After `<!---`, in a comment.
///
/// > 👉 **Note**: [html (flow)][html_flow] does allow `<!-->` or `<!--->` as
/// > empty comments.
@@ -199,7 +199,7 @@ pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a comment.
+/// In comment.
///
/// ```markdown
/// > | a <!--b--> c
@@ -223,7 +223,7 @@ pub fn comment(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a comment, after `-`.
+/// In comment, after `-`.
///
/// ```markdown
/// > | a <!--b--> c
@@ -239,7 +239,7 @@ pub fn comment_close(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<![`, inside CDATA, expecting `CDATA[`.
+/// After `<![`, in CDATA, expecting `CDATA[`.
///
/// ```markdown
/// > | a <![CDATA[>&<]]> b
@@ -285,7 +285,7 @@ pub fn cdata(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In CDATA, after `]`.
+/// In CDATA, after `]`, at another `]`.
///
/// ```markdown
/// > | a <![CDATA[>&<]]> b
@@ -301,7 +301,7 @@ pub fn cdata_close(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In CDATA, after `]]`.
+/// In CDATA, after `]]`, at `>`.
///
/// ```markdown
/// > | a <![CDATA[>&<]]> b
@@ -315,7 +315,7 @@ pub fn cdata_end(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a declaration.
+/// In declaration.
///
/// ```markdown
/// > | a <!b> c
@@ -335,7 +335,7 @@ pub fn declaration(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In an instruction.
+/// In instruction.
///
/// ```markdown
/// > | a <?b?> c
@@ -359,7 +359,7 @@ pub fn instruction(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In an instruction, after `?`.
+/// In instruction, after `?`, at `>`.
///
/// ```markdown
/// > | a <?b?> c
@@ -372,7 +372,7 @@ pub fn instruction_close(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `</`, in a closing tag, before a tag name.
+/// After `</`, in closing tag, at tag name.
///
/// ```markdown
/// > | a </b> c
@@ -406,7 +406,7 @@ pub fn tag_close(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a closing tag, after the tag name.
+/// In closing tag, after tag name.
///
/// ```markdown
/// > | a </b> c
@@ -426,7 +426,7 @@ pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<x`, in an opening tag name.
+/// After `<x`, in opening tag name.
///
/// ```markdown
/// > | a <b> c
@@ -444,7 +444,7 @@ pub fn tag_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In an opening tag, after the tag name.
+/// In opening tag, after tag name.
///
/// ```markdown
/// > | a <b> c
@@ -473,7 +473,7 @@ pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In an attribute name.
+/// In attribute name.
///
/// ```markdown
/// > | a <b c> d
@@ -490,8 +490,8 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After an attribute name, before an attribute initializer, the end of the
-/// tag, or whitespace.
+/// After attribute name, before initializer, the end of the tag, or
+/// whitespace.
///
/// ```markdown
/// > | a <b c> d
@@ -518,8 +518,8 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Before an unquoted, double quoted, or single quoted attribute value,
-/// allowing whitespace.
+/// Before unquoted, double quoted, or single quoted attribute value, allowing
+/// whitespace.
///
/// ```markdown
/// > | a <b c=d> e
@@ -551,7 +551,7 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a double or single quoted attribute value.
+/// In double or single quoted attribute value.
///
/// ```markdown
/// > | a <b c="d"> e
@@ -582,7 +582,7 @@ pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In an unquoted attribute value.
+/// In unquoted attribute value.
///
/// ```markdown
/// > | a <b c=d> e
@@ -599,8 +599,8 @@ pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After a double or single quoted attribute value, before whitespace or the
-/// end of the tag.
+/// After double or single quoted attribute value, before whitespace or the end
+/// of the tag.
///
/// ```markdown
/// > | a <b c="d"> e
@@ -613,7 +613,7 @@ pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State
}
}
-/// In certain circumstances of a complete tag where only an `>` is allowed.
+/// In certain circumstances of a tag where only an `>` is allowed.
///
/// ```markdown
/// > | a <b c="d"> e
@@ -631,7 +631,7 @@ pub fn end(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At an allowed line ending.
+/// At eol.
///
/// > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
/// > empty tokens.
@@ -654,7 +654,7 @@ pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After a line ending.
+/// After eol, at optional whitespace.
///
/// > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
/// > empty tokens.
@@ -672,7 +672,7 @@ pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab(tokenizer))
}
-/// After a line ending, after indent.
+/// After eol, after optional whitespace.
///
/// > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
/// > empty tokens.
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index da53125..8801ea7 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -291,6 +291,10 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State {
let is_link = tokenizer.events[label_start.start.0].name == Name::LabelLink;
+ // If this is a link, we need to mark earlier link starts as no longer
+ // viable for use (as they would otherwise contain a link).
+ // These link starts are still looking for balanced closing brackets, so
+ // we can’t remove them.
if is_link {
let mut index = 0;
while index < tokenizer.tokenize_state.label_start_stack.len() {
@@ -332,7 +336,7 @@ pub fn nok(tokenizer: &mut Tokenizer) -> State {
State::Nok
}
-/// Before a resource, at `(`.
+/// At a resource.
///
/// ```markdown
/// > | [a](b) c
@@ -351,7 +355,7 @@ pub fn resource_start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At the start of a resource, after `(`, before a destination.
+/// In resource, after `(`, at optional whitespace.
///
/// ```markdown
/// > | [a](b) c
@@ -365,7 +369,7 @@ pub fn resource_before(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_eol(tokenizer))
}
-/// At the start of a resource, after optional whitespace.
+/// In resource, after optional whitespace, at `)` or a destination.
///
/// ```markdown
/// > | [a](b) c
@@ -390,7 +394,7 @@ pub fn resource_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a resource, after a destination, before optional whitespace.
+/// In resource, after destination, at optional whitespace.
///
/// ```markdown
/// > | [a](b) c
@@ -410,7 +414,12 @@ pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_eol(tokenizer))
}
-/// Without destination.
+/// At invalid destination.
+///
+/// ```markdown
+/// > | [a](<<) b
+/// ^
+/// ```
pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Name::Data;
tokenizer.tokenize_state.token_2 = Name::Data;
@@ -421,7 +430,7 @@ pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State {
State::Nok
}
-/// In a resource, after a destination, after whitespace.
+/// In resource, after destination and whitespace, at `(` or title.
///
/// ```markdown
/// > | [a](b ) c
@@ -443,7 +452,7 @@ pub fn resource_between(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a resource, after a title.
+/// In resource, after title, at optional whitespace.
///
/// ```markdown
/// > | [a](b "c") d
@@ -460,7 +469,7 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_eol(tokenizer))
}
-/// In a resource, at the `)`.
+/// In resource, at `)`.
///
/// ```markdown
/// > | [a](b) d
@@ -479,7 +488,7 @@ pub fn resource_end(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a reference (full), at the `[`.
+/// In reference (full), at `[`.
///
/// ```markdown
/// > | [a][b] d
@@ -501,7 +510,7 @@ pub fn reference_full(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a reference (full), after `]`.
+/// In reference (full), after `]`.
///
/// ```markdown
/// > | [a][b] d
@@ -537,7 +546,7 @@ pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a reference (collapsed), at the `[`.
+/// In reference (collapsed), at `[`.
///
/// > 👉 **Note**: we only get here if the label is defined.
///
@@ -558,7 +567,7 @@ pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a reference (collapsed), at the `]`.
+/// In reference (collapsed), at `]`.
///
/// > 👉 **Note**: we only get here if the label is defined.
///
@@ -581,8 +590,8 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {
/// Resolve media.
///
-/// This turns correct label start (image, link) and label end into links and
-/// images, or turns them back into data.
+/// This turns matching label start (image, link) and label ends into links and
+/// images, and turns unmatched label starts back into data.
#[allow(clippy::too_many_lines)]
pub fn resolve(tokenizer: &mut Tokenizer) {
let mut left = tokenizer.tokenize_state.label_start_list_loose.split_off(0);
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index b8c8858..ce09f5b 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -52,7 +52,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `!`, before a `[`.
+/// After `!`, at `[`.
///
/// ```markdown
/// > | a ![b] c
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 20be73c..206f823 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -79,7 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Start of list item, after whitespace.
+/// After optional whitespace, at list item prefix.
///
/// ```markdown
/// > | * a
@@ -89,10 +89,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Unordered.
Some(b'*' | b'-') => {
- tokenizer.check(
- State::Next(StateName::ListNok),
- State::Next(StateName::ListBeforeUnordered),
- );
+ tokenizer.check(State::Nok, State::Next(StateName::ListBeforeUnordered));
State::Retry(StateName::ThematicBreakStart)
}
Some(b'+') => State::Retry(StateName::ListBeforeUnordered),
@@ -103,7 +100,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Start of an unordered list item.
+/// At unordered list item marker.
///
/// The line is not a thematic break.
///
@@ -116,7 +113,7 @@ pub fn before_unordered(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::ListMarker)
}
-/// Start of an ordered list item.
+/// At ordered list item value.
///
/// ```markdown
/// > | * a
@@ -128,7 +125,7 @@ pub fn before_ordered(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::ListValue)
}
-/// In an ordered list item value.
+/// In ordered list item value.
///
/// ```markdown
/// > | 1. a
@@ -152,7 +149,7 @@ pub fn value(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At a list item marker.
+/// At list item marker.
///
/// ```markdown
/// > | * a
@@ -167,7 +164,7 @@ pub fn marker(tokenizer: &mut Tokenizer) -> State {
State::Next(StateName::ListMarkerAfter)
}
-/// After a list item marker.
+/// After list item marker.
///
/// ```markdown
/// > | * a
@@ -184,7 +181,9 @@ pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::BlankLineStart)
}
-/// After a list item marker, not followed by a blank line.
+/// After list item marker.
+///
+/// The marker is not followed by a blank line.
///
/// ```markdown
/// > | * a
@@ -201,7 +200,7 @@ pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::ListWhitespace)
}
-/// In whitespace after a marker.
+/// After marker, at whitespace.
///
/// ```markdown
/// > | * a
@@ -226,7 +225,7 @@ pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After a list item marker, followed by no indent or more indent that needed.
+/// After marker, followed by no indent or more indent that needed.
///
/// ```markdown
/// > | * a
@@ -244,7 +243,7 @@ pub fn prefix_other(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After a list item prefix.
+/// After list item prefix.
///
/// ```markdown
/// > | * a
@@ -318,7 +317,6 @@ pub fn cont_blank(tokenizer: &mut Tokenizer) -> State {
if container.blank_initial {
State::Nok
} else {
- tokenizer.attempt(State::Next(StateName::ListOk), State::Nok);
// Consume, optionally, at most `size`.
State::Retry(space_or_tab_min_max(tokenizer, 0, size))
}
@@ -338,21 +336,10 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
container.blank_initial = false;
- tokenizer.attempt(State::Next(StateName::ListOk), State::Nok);
// Consume exactly `size`.
State::Retry(space_or_tab_min_max(tokenizer, size, size))
}
-/// A state fn to yield [`State::Ok`].
-pub fn ok(_tokenizer: &mut Tokenizer) -> State {
- State::Ok
-}
-
-/// A state fn to yield [`State::Nok`].
-pub fn nok(_tokenizer: &mut Tokenizer) -> State {
- State::Nok
-}
-
/// Find adjacent list items with the same marker.
pub fn resolve(tokenizer: &mut Tokenizer) {
let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 663b01b..c956a2c 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -38,7 +38,7 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::skip::opt as skip_opt;
-/// Before a paragraph.
+/// Before paragraph.
///
/// ```markdown
/// > | abc
@@ -55,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a paragraph.
+/// In paragraph.
///
/// ```markdown
/// > | abc
diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs
index 0175971..1818ef4 100644
--- a/src/construct/partial_bom.rs
+++ b/src/construct/partial_bom.rs
@@ -16,7 +16,7 @@ use crate::tokenizer::Tokenizer;
const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
-/// Before a BOM.
+/// Before BOM.
///
/// ```text
/// > | 0xEF 0xBB 0xBF
@@ -31,7 +31,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Inside the BOM.
+/// In BOM.
///
/// ```text
/// > | 0xEF 0xBB 0xBF
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index f9b7947..b6b0f59 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -11,7 +11,7 @@ use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-/// At the beginning of data.
+/// At beginning of data.
///
/// ```markdown
/// > | abc
@@ -79,11 +79,10 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
/// Merge adjacent data events.
pub fn resolve(tokenizer: &mut Tokenizer) {
- let len = tokenizer.events.len();
let mut index = 0;
// Loop through events and merge adjacent data events.
- while index < len {
+ while index < tokenizer.events.len() {
let event = &tokenizer.events[index];
if event.kind == Kind::Enter && event.name == Name::Data {
@@ -91,7 +90,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
let mut exit_far_index = exit_index;
// Find multiple `data` events.
- while exit_far_index + 1 < len
+ while exit_far_index + 1 < tokenizer.events.len()
&& tokenizer.events[exit_far_index + 1].name == Name::Data
{
exit_far_index += 2;
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index c1c1e10..dc5c904 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -75,7 +75,7 @@ use crate::event::{Content, Name};
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-/// Before a destination.
+/// Start of destination.
///
/// ```markdown
/// > | <aa>
@@ -105,7 +105,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `<`, before an enclosed destination.
+/// After `<`, at an enclosed destination.
///
/// ```markdown
/// > | <aa>
@@ -126,7 +126,7 @@ pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In an enclosed destination.
+/// In enclosed destination.
///
/// ```markdown
/// > | <aa>
@@ -151,7 +151,7 @@ pub fn enclosed(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `\`, in an enclosed destination.
+/// After `\`, at a special character.
///
/// ```markdown
/// > | <a\*a>
@@ -167,7 +167,7 @@ pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a raw destination.
+/// In raw destination.
///
/// ```markdown
/// > | aa
@@ -209,7 +209,7 @@ pub fn raw(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `\`, in a raw destination.
+/// After `\`, at special character.
///
/// ```markdown
/// > | a\*a
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 0f7aa00..a1667e1 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -65,7 +65,7 @@ use crate::state::{Name as StateName, State};
use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
-/// Before a label.
+/// Start of label.
///
/// ```markdown
/// > | [a]
@@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a label, at something.
+/// In label, at something, before something else.
///
/// ```markdown
/// > | [a]
@@ -142,7 +142,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a label, after whitespace.
+/// In label, after whitespace.
///
/// ```markdown
/// | [a␊
@@ -154,7 +154,7 @@ pub fn eol_after(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::LabelAtBreak)
}
-/// In a label, at a blank line.
+/// In label, at blank line.
///
/// ```markdown
/// | [a␊
@@ -168,7 +168,7 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
State::Nok
}
-/// In a label, in text.
+/// In label, in text.
///
/// ```markdown
/// > | [a]
@@ -200,7 +200,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `\` in a label.
+/// After `\`, at a special character.
///
/// ```markdown
/// > | [a\*a]
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
index 76854c8..3bbf7cc 100644
--- a/src/construct/partial_non_lazy_continuation.rs
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -14,7 +14,7 @@ use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-/// Start of continuation.
+/// At eol, before continuation.
///
/// ```markdown
/// > | * ```js
@@ -33,7 +33,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After line ending.
+/// A continuation.
///
/// ```markdown
/// | * ```js
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index f2d9a73..5f1b4cf 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -61,7 +61,7 @@ pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) ->
StateName::SpaceOrTabStart
}
-/// Before `space_or_tab`.
+/// Start of `space_or_tab`.
///
/// ```markdown
/// > | a␠␠b
diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs
index 2127fe6..08f4bf2 100644
--- a/src/construct/partial_space_or_tab_eol.rs
+++ b/src/construct/partial_space_or_tab_eol.rs
@@ -44,7 +44,16 @@ pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options
StateName::SpaceOrTabEolStart
}
-pub fn eol_start(tokenizer: &mut Tokenizer) -> State {
+/// Start of whitespace with at most one eol.
+///
+/// ```markdown
+/// > | a␠␠b
+/// ^
+/// > | a␠␠␊
+/// ^
+/// | ␠␠b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::SpaceOrTabEolAfterFirst),
State::Next(StateName::SpaceOrTabEolAtEol),
@@ -65,7 +74,16 @@ pub fn eol_start(tokenizer: &mut Tokenizer) -> State {
))
}
-pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State {
+/// After initial whitespace, at optional eol.
+///
+/// ```markdown
+/// > | a␠␠b
+/// ^
+/// > | a␠␠␊
+/// ^
+/// | ␠␠b
+/// ```
+pub fn after_first(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.space_or_tab_eol_ok = true;
if tokenizer
@@ -79,14 +97,19 @@ pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::SpaceOrTabEolAtEol)
}
-/// `space_or_tab_eol`: after optionally first `space_or_tab`.
+/// After optional whitespace, at eol.
///
/// ```markdown
-/// > | a
+/// > | a␠␠b
+/// ^
+/// > | a␠␠␊
+/// ^
+/// | ␠␠b
+/// > | a␊
/// ^
-/// | b
+/// | ␠␠b
/// ```
-pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State {
+pub fn at_eol(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\n') = tokenizer.current {
tokenizer.enter_with_content(
Name::LineEnding,
@@ -123,15 +146,17 @@ pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State {
}
}
-/// `space_or_tab_eol`: after eol.
+/// After eol.
///
/// ```markdown
-/// | a
-/// > | b
+/// | a␠␠␊
+/// > | ␠␠b
+/// ^
+/// | a␊
+/// > | ␠␠b
/// ^
/// ```
-#[allow(clippy::needless_pass_by_value)]
-pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State {
+pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::SpaceOrTabEolAfterMore),
State::Next(StateName::SpaceOrTabEolAfterMore),
@@ -151,14 +176,17 @@ pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State {
))
}
-/// `space_or_tab_eol`: after more (optional) `space_or_tab`.
+/// After optional final whitespace.
///
/// ```markdown
-/// | a
-/// > | b
-/// ^
+/// | a␠␠␊
+/// > | ␠␠b
+/// ^
+/// | a␊
+/// > | ␠␠b
+/// ^
/// ```
-pub fn eol_after_more(tokenizer: &mut Tokenizer) -> State {
+pub fn after_more(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.space_or_tab_eol_content_type = None;
tokenizer.tokenize_state.space_or_tab_eol_connect = false;
tokenizer.tokenize_state.space_or_tab_eol_ok = false;
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 6f7a037..b97243e 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -36,7 +36,7 @@ use crate::state::{Name as StateName, State};
use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
-/// Before a title.
+/// Start of title.
///
/// ```markdown
/// > | "a"
@@ -57,9 +57,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After the opening marker.
+/// After opening marker.
///
-/// This is also used when at the closing marker.
+/// This is also used at the closing marker.
///
/// ```markdown
/// > | "a"
@@ -132,7 +132,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a title, after whitespace.
+/// In title, after whitespace.
///
/// ```markdown
/// | "a␊
@@ -144,7 +144,7 @@ pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::TitleAtBreak)
}
-/// In a title, at a blank line.
+/// In title, at blank line.
///
/// ```markdown
/// | "a␊
@@ -158,7 +158,7 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
State::Nok
}
-/// In title text.
+/// In text.
///
/// ```markdown
/// > | "a"
@@ -187,7 +187,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After `\`, in title text.
+/// After `\`, at a special character.
///
/// ```markdown
/// > | "a\*b"
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index f4d008d..b2989cb 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -54,7 +54,7 @@ use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-/// Start of a thematic break.
+/// Start of thematic break.
///
/// ```markdown
/// > | ***
@@ -78,7 +78,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Start of a thematic break, after whitespace.
+/// After optional whitespace, at marker.
///
/// ```markdown
/// > | ***
@@ -94,7 +94,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After something but before something else.
+/// After something, before something else.
///
/// ```markdown
/// > | ***
@@ -124,7 +124,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In a sequence of markers.
+/// In sequence.
///
/// ```markdown
/// > | ***
diff --git a/src/content/document.rs b/src/content/document.rs
index 04f9dc6..59e6e7c 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -46,7 +46,7 @@ enum Phase {
Eof,
}
-/// Turn `codes` as the document content type into events.
+/// Parse a document.
pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
let mut tokenizer = Tokenizer::new(point, parse_state);
@@ -66,9 +66,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
events
}
-/// At the beginning.
-///
-/// Perhaps a BOM?
+/// Start of document, at an optional BOM.
///
/// ```markdown
/// > | a
@@ -88,7 +86,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::BomStart)
}
-/// Before existing containers.
+/// At optional existing containers.
//
/// ```markdown
/// | * a
@@ -121,7 +119,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After an existing container.
+/// After continued existing container.
//
/// ```markdown
/// | * a
@@ -133,7 +131,7 @@ pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::DocumentContainerExistingBefore)
}
-/// Before a new container.
+/// At new containers.
//
/// ```markdown
/// > | * a
@@ -183,7 +181,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::BlockQuoteStart)
}
-/// Maybe before a new container, but not a block quote.
+/// At new container, but not a block quote.
//
/// ```markdown
/// > | * a
@@ -206,7 +204,7 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State
State::Retry(StateName::ListStart)
}
-/// Maybe before a new container, but not a list.
+/// At new container, but not a list (or block quote).
//
/// ```markdown
/// > | a
@@ -224,7 +222,7 @@ pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::DocumentContainersAfter)
}
-/// After a new container.
+/// After new container.
///
/// ```markdown
/// > | * a
@@ -258,7 +256,7 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::DocumentContainerNewBefore)
}
-/// After containers, before flow.
+/// After containers, at flow.
//
/// ```markdown
/// > | * a
diff --git a/src/content/flow.rs b/src/content/flow.rs
index c6bd398..08c7891 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -23,15 +23,15 @@ use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-/// Before flow.
-///
-/// First we assume a blank line.
+/// Start of flow.
//
/// ```markdown
-/// |
-/// |## alpha
-/// | bravo
-/// |***
+/// > | ## alpha
+/// ^
+/// > | bravo
+/// ^
+/// > | ***
+/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
@@ -81,6 +81,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
// Actual parsing: blank line? Indented code? Indented anything?
// Also includes `-` which can be a setext heading underline or a thematic break.
None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
+ // Must be a paragraph.
Some(_) => {
tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
State::Retry(StateName::ParagraphStart)
@@ -88,6 +89,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
}
+/// At blank line.
+///
+/// ```markdown
+/// > | ␠␠␊
+/// ^
+/// ```
pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowBlankLineAfter),
@@ -96,17 +103,11 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::BlankLineStart)
}
-/// Before flow (initial).
-///
-/// “Initial” flow means unprefixed flow, so right at the start of a line.
-/// Interestingly, the only flow (initial) construct is indented code.
-/// Move to `before` afterwards.
+/// At code (indented).
///
/// ```markdown
-/// |qwe
-/// | asd
-/// |~~~js
-/// |<div>
+/// > | ␠␠␠␠a
+/// ^
/// ```
pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
@@ -116,6 +117,12 @@ pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::CodeIndentedStart)
}
+/// At code (fenced).
+///
+/// ````markdown
+/// > | ```
+/// ^
+/// ````
pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
@@ -124,6 +131,12 @@ pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::CodeFencedStart)
}
+/// At html (flow).
+///
+/// ```markdown
+/// > | <a>
+/// ^
+/// ```
pub fn before_html(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
@@ -132,6 +145,12 @@ pub fn before_html(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::HtmlFlowStart)
}
+/// At heading (atx).
+///
+/// ```markdown
+/// > | # a
+/// ^
+/// ```
pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
@@ -140,6 +159,13 @@ pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::HeadingAtxStart)
}
+/// At heading (setext).
+///
+/// ```markdown
+/// | a
+/// > | =
+/// ^
+/// ```
pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
@@ -148,6 +174,12 @@ pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::HeadingSetextStart)
}
+/// At thematic break.
+///
+/// ```markdown
+/// > | ***
+/// ^
+/// ```
pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
@@ -156,6 +188,12 @@ pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::ThematicBreakStart)
}
+/// At definition.
+///
+/// ```markdown
+/// > | [a]: b
+/// ^
+/// ```
pub fn before_definition(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
@@ -164,12 +202,22 @@ pub fn before_definition(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::DefinitionStart)
}
-/// After a blank line.
+/// At paragraph.
///
-/// Move to `start` afterwards.
+/// ```markdown
+/// > | a
+/// ^
+/// ```
+pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
+ State::Retry(StateName::ParagraphStart)
+}
+
+/// After blank line.
///
/// ```markdown
-/// ␠␠|
+/// > | ␠␠␊
+/// ^
/// ```
pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
@@ -186,14 +234,11 @@ pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// After something.
+/// After flow.
///
/// ```markdown
-/// ## alpha|
-/// |
-/// ~~~js
-/// asd
-/// ~~~|
+/// > | # a␊
+/// ^
/// ```
pub fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
@@ -207,13 +252,3 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
_ => unreachable!("expected eol/eof"),
}
}
-
-/// Before a paragraph.
-///
-/// ```markdown
-/// |asd
-/// ```
-pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
- State::Retry(StateName::ParagraphStart)
-}
diff --git a/src/content/string.rs b/src/content/string.rs
index 1eefd30..ec4fce2 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -17,9 +17,15 @@ use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
+/// Characters that can start something in string.
const MARKERS: [u8; 2] = [b'&', b'\\'];
/// Start of string.
+///
+/// ````markdown
+/// > | ```js
+/// ^
+/// ````
pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.register_resolver(ResolveName::String);
tokenizer.tokenize_state.markers = &MARKERS;
@@ -27,6 +33,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
/// Before string.
+///
+/// ````markdown
+/// > | ```js
+/// ^
+/// ````
pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
@@ -49,12 +60,17 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
}
/// At data.
+///
+/// ````markdown
+/// > | ```js
+/// ^
+/// ````
pub fn before_data(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok);
State::Retry(StateName::DataStart)
}
-/// Resolve whitespace.
+/// Resolve whitespace in string.
pub fn resolve(tokenizer: &mut Tokenizer) {
resolve_whitespace(tokenizer, false, false);
}
diff --git a/src/content/text.rs b/src/content/text.rs
index 6509d30..5c13dba 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -25,6 +25,7 @@ use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
+/// Characters that can start something in text.
const MARKERS: [u8; 9] = [
b'!', // `label_start_image`
b'&', // `character_reference`
@@ -38,6 +39,11 @@ const MARKERS: [u8; 9] = [
];
/// Start of text.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.register_resolver(ResolveName::Text);
tokenizer.tokenize_state.markers = &MARKERS;
@@ -45,6 +51,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
/// Before text.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// ```
pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
@@ -109,7 +120,14 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At `<`, which wasn’t an autolink: before HTML?
+/// Before html (text).
+///
+/// At `<`, which wasn’t an autolink.
+///
+/// ```markdown
+/// > | a <b>
+/// ^
+/// ```
pub fn before_html(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::TextBefore),
@@ -118,7 +136,14 @@ pub fn before_html(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::HtmlTextStart)
}
-/// At `\`, which wasn’t a character escape: before a hard break?
+/// Before hard break escape.
+///
+/// At `\`, which wasn’t a character escape.
+///
+/// ```markdown
+/// > | a \␊
+/// ^
+/// ```
pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::TextBefore),
@@ -127,7 +152,12 @@ pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::HardBreakEscapeStart)
}
-/// At data.
+/// Before data.
+///
+/// ```markdown
+/// > | a
+/// ^
+/// ```
pub fn before_data(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok);
State::Retry(StateName::DataStart)
diff --git a/src/event.rs b/src/event.rs
index 664a609..51ecd86 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -1887,7 +1887,7 @@ pub enum Content {
Text,
}
-/// A link to another event.
+/// Link to another event.
#[derive(Debug, Clone)]
pub struct Link {
pub previous: Option<usize>,
@@ -1895,7 +1895,7 @@ pub struct Link {
pub content_type: Content,
}
-/// A location in the document (`line`/`column`/`offset`).
+/// Place in the document.
///
/// The interface for the location in the document comes from unist `Point`:
/// <https://github.com/syntax-tree/unist#point>.
@@ -1916,7 +1916,7 @@ pub struct Point {
pub vs: usize,
}
-/// Possible event kinds.
+/// Event kinds.
#[derive(Debug, PartialEq, Clone)]
pub enum Kind {
/// The start of something.
@@ -1928,8 +1928,12 @@ pub enum Kind {
/// Something semantic happening somewhere.
#[derive(Debug, Clone)]
pub struct Event {
+ /// Kind of event.
pub kind: Kind,
+ /// Name of event.
pub name: Name,
+ /// Place where this happens.
pub point: Point,
+ /// Link to another event.
pub link: Option<Link>,
}
diff --git a/src/lib.rs b/src/lib.rs
index afa34c0..24a794b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -72,7 +72,7 @@ impl LineEnding {
"\r\n" => LineEnding::CarriageReturnLineFeed,
"\r" => LineEnding::CarriageReturn,
"\n" => LineEnding::LineFeed,
- _ => unreachable!("invalid code"),
+ _ => unreachable!("invalid str"),
}
}
}
diff --git a/src/parser.rs b/src/parser.rs
index dc2c07a..a8416ed 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -4,10 +4,10 @@ use crate::content::document::document;
use crate::event::{Event, Point};
use crate::{Constructs, Options};
-/// Information needed, in all content types, when parsing markdown.
+/// Info needed, in all content types, when parsing markdown.
///
/// Importantly, this contains a set of known definitions.
-/// It also references the input value as a `Vec<char>`.
+/// It also references the input value as bytes (`u8`).
#[derive(Debug)]
pub struct ParseState<'a> {
pub constructs: &'a Constructs,
@@ -19,7 +19,7 @@ pub struct ParseState<'a> {
/// Turn a string of markdown into events.
///
-/// Passes the codes back so the compiler can access the source.
+/// Passes the bytes back so the compiler can access the source.
pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) {
let mut parse_state = ParseState {
constructs: &options.constructs,
diff --git a/src/resolve.rs b/src/resolve.rs
index e72b2a2..e7d63f9 100644
--- a/src/resolve.rs
+++ b/src/resolve.rs
@@ -2,7 +2,7 @@ use crate::construct;
use crate::content;
use crate::tokenizer::Tokenizer;
-/// Names of functions to move to.
+/// Names of functions that resolve.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Name {
Label,
@@ -16,7 +16,7 @@ pub enum Name {
Text,
}
-/// Call the corresponding function for a state name.
+/// Call the corresponding resolver.
pub fn call(tokenizer: &mut Tokenizer, name: Name) {
let func = match name {
Name::Label => construct::label_end::resolve,
diff --git a/src/state.rs b/src/state.rs
index d5055e2..190683c 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -36,7 +36,6 @@ pub enum Name {
BlankLineAfter,
BlockQuoteStart,
- BlockQuoteBefore,
BlockQuoteContStart,
BlockQuoteContBefore,
BlockQuoteContAfter,
@@ -245,7 +244,6 @@ pub enum Name {
ListStart,
ListBefore,
- ListNok,
ListBeforeOrdered,
ListBeforeUnordered,
ListValue,
@@ -259,7 +257,6 @@ pub enum Name {
ListContStart,
ListContBlank,
ListContFilled,
- ListOk,
NonLazyContinuationStart,
NonLazyContinuationAfter,
@@ -322,7 +319,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::BlankLineAfter => construct::blank_line::after,
Name::BlockQuoteStart => construct::block_quote::start,
- Name::BlockQuoteBefore => construct::block_quote::before,
Name::BlockQuoteContStart => construct::block_quote::cont_start,
Name::BlockQuoteContBefore => construct::block_quote::cont_before,
Name::BlockQuoteContAfter => construct::block_quote::cont_after,
@@ -562,7 +558,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::ListStart => construct::list::start,
Name::ListBefore => construct::list::before,
- Name::ListNok => construct::list::nok,
Name::ListBeforeOrdered => construct::list::before_ordered,
Name::ListBeforeUnordered => construct::list::before_unordered,
Name::ListValue => construct::list::value,
@@ -576,7 +571,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::ListContStart => construct::list::cont_start,
Name::ListContBlank => construct::list::cont_blank,
Name::ListContFilled => construct::list::cont_filled,
- Name::ListOk => construct::list::ok,
Name::NonLazyContinuationStart => construct::partial_non_lazy_continuation::start,
Name::NonLazyContinuationAfter => construct::partial_non_lazy_continuation::after,
@@ -588,11 +582,11 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::SpaceOrTabInside => construct::partial_space_or_tab::inside,
Name::SpaceOrTabAfter => construct::partial_space_or_tab::after,
- Name::SpaceOrTabEolStart => construct::partial_space_or_tab_eol::eol_start,
- Name::SpaceOrTabEolAfterFirst => construct::partial_space_or_tab_eol::eol_after_first,
- Name::SpaceOrTabEolAfterEol => construct::partial_space_or_tab_eol::eol_after_eol,
- Name::SpaceOrTabEolAtEol => construct::partial_space_or_tab_eol::eol_at_eol,
- Name::SpaceOrTabEolAfterMore => construct::partial_space_or_tab_eol::eol_after_more,
+ Name::SpaceOrTabEolStart => construct::partial_space_or_tab_eol::start,
+ Name::SpaceOrTabEolAfterFirst => construct::partial_space_or_tab_eol::after_first,
+ Name::SpaceOrTabEolAfterEol => construct::partial_space_or_tab_eol::after_eol,
+ Name::SpaceOrTabEolAtEol => construct::partial_space_or_tab_eol::at_eol,
+ Name::SpaceOrTabEolAfterMore => construct::partial_space_or_tab_eol::after_more,
Name::StringStart => content::string::start,
Name::StringBefore => content::string::before,
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 588d2a3..432c198 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -27,7 +27,7 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::edit_map::EditMap;
-/// Create a link between two [`Event`][]s.
+/// Link two [`Event`][]s.
///
/// Arbitrary (void) events can be linked together.
/// This optimizes for the common case where the token at `index` is connected
@@ -60,7 +60,7 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
/// Parse linked events.
///
-/// Supposed to be called repeatedly, returns `1: true` when done.
+/// Supposed to be called repeatedly, returns `true` when done.
pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
let mut map = EditMap::new();
let mut done = true;
@@ -123,36 +123,35 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
done
}
-/// Parse linked events.
-///
-/// Supposed to be called repeatedly, returns `1: true` when done.
+/// Divide `child_events` over links in `events`, the first of which is at
+/// `link_index`.
pub fn divide_events(
map: &mut EditMap,
events: &[Event],
mut link_index: usize,
child_events: &mut Vec<Event>,
) {
- // Now, loop through all subevents to figure out which parts
- // belong where and fix deep links.
- let mut subindex = 0;
+ // Loop through `child_events` to figure out which parts belong where and
+ // fix deep links.
+ let mut child_index = 0;
let mut slices = vec![];
let mut slice_start = 0;
let mut old_prev: Option<usize> = None;
- while subindex < child_events.len() {
- let current = &child_events[subindex].point;
+ while child_index < child_events.len() {
+ let current = &child_events[child_index].point;
let end = &events[link_index + 1].point;
// Find the first event that starts after the end we’re looking
// for.
if current.index > end.index || (current.index == end.index && current.vs > end.vs) {
slices.push((link_index, slice_start));
- slice_start = subindex;
+ slice_start = child_index;
link_index = events[link_index].link.as_ref().unwrap().next.unwrap();
}
// Fix sublinks.
- if let Some(sublink_curr) = &child_events[subindex].link {
+ if let Some(sublink_curr) = &child_events[child_index].link {
if sublink_curr.previous.is_some() {
let old_prev = old_prev.unwrap();
let prev_event = &mut child_events[old_prev];
@@ -173,7 +172,7 @@ pub fn divide_events(
// its `previous` index to account for the shifted events.
// If it points to a next event, we also change the next event’s
// reference back to *this* event.
- if let Some(sublink_curr) = &child_events[subindex].link {
+ if let Some(sublink_curr) = &child_events[child_index].link {
if let Some(next) = sublink_curr.next {
let sublink_next = child_events[next].link.as_mut().unwrap();
@@ -188,7 +187,7 @@ pub fn divide_events(
}
}
- subindex += 1;
+ child_index += 1;
}
if !child_events.is_empty() {
@@ -200,10 +199,13 @@ pub fn divide_events(
while index > 0 {
index -= 1;
- let start = slices[index].0;
map.add(
- start,
- if start == events.len() { 0 } else { 2 },
+ slices[index].0,
+ if slices[index].0 == events.len() {
+ 0
+ } else {
+ 2
+ },
child_events.split_off(slices[index].1),
);
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 0b51c48..3f60b86 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -76,7 +76,7 @@ pub struct Media {
/// Different kinds of attempts.
#[derive(Debug, PartialEq)]
enum AttemptKind {
- /// Discard what was tokenizer when unsuccessful.
+ /// Discard what was tokenized when unsuccessful.
Attempt,
/// Discard always.
Check,
@@ -356,7 +356,7 @@ impl<'a> Tokenizer<'a> {
}
}
- /// Prepare for a next code to get consumed.
+ /// Prepare for a next byte to get consumed.
fn expect(&mut self, byte: Option<u8>) {
debug_assert!(self.consumed, "expected previous byte to be consumed");
self.consumed = false;
@@ -368,11 +368,10 @@ impl<'a> Tokenizer<'a> {
/// used, or call a next function.
pub fn consume(&mut self) {
debug_assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned");
-
self.move_one();
self.previous = self.current;
- // While we’re not at the eof, it is at least better to not have the
+ // While we’re not at eof, it is at least better to not have the
// same current code as `previous` *and* `current`.
self.current = None;
// Mark as consumed.
@@ -427,6 +426,7 @@ impl<'a> Tokenizer<'a> {
self.enter_with_link(name, None);
}
+ /// Enter with a content type.
pub fn enter_with_content(&mut self, name: Name, content_type_opt: Option<Content>) {
self.enter_with_link(
name,
@@ -438,6 +438,7 @@ impl<'a> Tokenizer<'a> {
);
}
+ /// Enter with a link.
pub fn enter_with_link(&mut self, name: Name, link: Option<Link>) {
let mut point = self.point.clone();
move_point_back(self, &mut point);
@@ -663,7 +664,7 @@ fn push_impl(
};
}
State::Retry(name) => {
- log::debug!("retry: {:?}", name);
+ log::debug!("retry: `{:?}`", name);
state = call(tokenizer, name);
}
}
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
index 59adfca..11ac486 100644
--- a/src/util/edit_map.rs
+++ b/src/util/edit_map.rs
@@ -53,8 +53,7 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) {
}
}
-/// Make it easy to insert and remove things while being performant and keeping
-/// links in check.
+/// Tracks a bunch of edits.
#[derive(Debug)]
pub struct EditMap {
/// Record of changes.
diff --git a/src/util/skip.rs b/src/util/skip.rs
index 371418f..46cbb4a 100644
--- a/src/util/skip.rs
+++ b/src/util/skip.rs
@@ -12,15 +12,18 @@ pub fn opt_back(events: &[Event], index: usize, names: &[Name]) -> usize {
skip_opt_impl(events, index, names, false)
}
-pub fn to_back(events: &[Event], index: usize, names: &[Name]) -> usize {
- to_impl(events, index, names, false)
-}
-
+/// Skip from `index` forwards to `names`.
pub fn to(events: &[Event], index: usize, names: &[Name]) -> usize {
to_impl(events, index, names, true)
}
-pub fn to_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool) -> usize {
+/// Skip from `index` backwards to `names`.
+pub fn to_back(events: &[Event], index: usize, names: &[Name]) -> usize {
+ to_impl(events, index, names, false)
+}
+
+/// Skip to something.
+fn to_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool) -> usize {
while index < events.len() {
let current = &events[index].name;
@@ -34,7 +37,7 @@ pub fn to_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool
index
}
-/// Skip internals.
+/// Skip past things.
fn skip_opt_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool) -> usize {
let mut balance = 0;
let open = if forward { Kind::Enter } else { Kind::Exit };
diff --git a/src/util/slice.rs b/src/util/slice.rs
index 34adf32..e70078a 100644
--- a/src/util/slice.rs
+++ b/src/util/slice.rs
@@ -4,7 +4,7 @@ use crate::constant::TAB_SIZE;
use crate::event::{Event, Kind, Point};
use std::str;
-/// A range between two places.
+/// A range between two points.
#[derive(Debug)]
pub struct Position<'a> {
pub start: &'a Point,
@@ -53,9 +53,9 @@ impl<'a> Position<'a> {
}
}
-/// Chars belonging to a range.
+/// Bytes belonging to a range.
///
-/// Includes information on virtual spaces before and after the chars.
+/// Includes information on virtual spaces before and after the bytes.
#[derive(Debug)]
pub struct Slice<'a> {
pub bytes: &'a [u8],
@@ -64,7 +64,7 @@ pub struct Slice<'a> {
}
impl<'a> Slice<'a> {
- /// Get the slice belonging to a point.
+ /// Get a slice for a single point.
pub fn from_point(bytes: &'a [u8], point: &Point) -> Slice<'a> {
let mut before = point.vs;
let mut start = point.index;
@@ -88,16 +88,14 @@ impl<'a> Slice<'a> {
}
}
- /// Create a slice from one index.
- ///
- /// Indices are places in `bytes`.
+ /// Get a slice for a single index.
///
/// > 👉 **Note**: indices cannot represent virtual spaces.
pub fn from_index(bytes: &'a [u8], index: usize) -> Slice<'a> {
Slice::from_indices(bytes, index, index + 1)
}
- /// Get the slice belonging to a position.
+ /// Get a slice for a position.
pub fn from_position(bytes: &'a [u8], position: &Position) -> Slice<'a> {
let mut before = position.start.vs;
let mut after = position.end.vs;
@@ -125,9 +123,7 @@ impl<'a> Slice<'a> {
}
}
- /// Create a slice from two indices.
- ///
- /// Indices are places in `bytes`.
+ /// Get a slice for two indices.
///
/// > 👉 **Note**: indices cannot represent virtual spaces.
pub fn from_indices(bytes: &'a [u8], start: usize, end: usize) -> Slice<'a> {
@@ -157,7 +153,7 @@ impl<'a> Slice<'a> {
/// Turn the slice into a `&str`.
///
- /// Does not support virtual spaces.
+ /// > 👉 **Note**: cannot represent virtual spaces.
pub fn as_str(&self) -> &str {
str::from_utf8(self.bytes).unwrap()
}