aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-25 15:29:11 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-25 15:29:11 +0200
commit11304728b6607bc2a8d41a640308f3379a25b933 (patch)
treec49fb64a64e1c39b889a40f48dcd44f87aaea7b1 /src
parent9c18ff7858730f0c7782206129375c7efcb7d77f (diff)
downloadmarkdown-rs-11304728b6607bc2a8d41a640308f3379a25b933.tar.gz
markdown-rs-11304728b6607bc2a8d41a640308f3379a25b933.tar.bz2
markdown-rs-11304728b6607bc2a8d41a640308f3379a25b933.zip
Improve performance w/ a single feed loop
Diffstat (limited to 'src')
-rw-r--r--src/construct/attention.rs2
-rw-r--r--src/construct/blank_line.rs4
-rw-r--r--src/construct/block_quote.rs2
-rw-r--r--src/construct/code_fenced.rs6
-rw-r--r--src/construct/code_indented.rs8
-rw-r--r--src/construct/code_text.rs2
-rw-r--r--src/construct/definition.rs6
-rw-r--r--src/construct/hard_break_escape.rs2
-rw-r--r--src/construct/hard_break_trailing.rs2
-rw-r--r--src/construct/heading_atx.rs2
-rw-r--r--src/construct/heading_setext.rs2
-rw-r--r--src/construct/html_flow.rs4
-rw-r--r--src/construct/label_end.rs8
-rw-r--r--src/construct/list.rs10
-rw-r--r--src/construct/paragraph.rs2
-rw-r--r--src/construct/partial_data.rs2
-rw-r--r--src/construct/partial_destination.rs4
-rw-r--r--src/construct/partial_non_lazy_continuation.rs4
-rw-r--r--src/construct/partial_space_or_tab.rs8
-rw-r--r--src/construct/partial_whitespace.rs4
-rw-r--r--src/construct/thematic_break.rs2
-rw-r--r--src/content/document.rs2
-rw-r--r--src/subtokenize.rs8
-rw-r--r--src/tokenizer.rs187
24 files changed, 138 insertions, 145 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 1aa25c0..eb93810 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -193,7 +193,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, marker: MarkerKind) -> State {
_ => {
tokenizer.exit(Token::AttentionSequence);
tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention));
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
}
}
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index 1121b81..dc36784 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -59,9 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> State {
/// ```
fn after(_tokenizer: &mut Tokenizer, code: Code) -> State {
match code {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
- }
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => State::Ok(0),
_ => State::Nok,
}
}
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index da21add..9925a5a 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -128,7 +128,7 @@ fn cont_after(tokenizer: &mut Tokenizer, code: Code) -> State {
}
_ => {
tokenizer.exit(Token::BlockQuotePrefix);
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
}
}
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 3923ba0..a814142 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -506,7 +506,7 @@ fn close_sequence_after(tokenizer: &mut Tokenizer, code: Code) -> State {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(Token::CodeFencedFence);
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
_ => State::Nok,
}
@@ -589,11 +589,11 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> State
/// > | ~~~
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer, code: Code) -> State {
+fn after(tokenizer: &mut Tokenizer, _code: Code) -> State {
tokenizer.exit(Token::CodeFenced);
// Feel free to interrupt.
tokenizer.interrupt = false;
// No longer concrete.
tokenizer.concrete = false;
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 512a816..6c528ff 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -115,11 +115,11 @@ fn content(tokenizer: &mut Tokenizer, code: Code) -> State {
/// > | aaa
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer, code: Code) -> State {
+fn after(tokenizer: &mut Tokenizer, _code: Code) -> State {
tokenizer.exit(Token::CodeIndented);
// Feel free to interrupt.
tokenizer.interrupt = false;
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
/// Right at a line ending, trying to parse another indent.
@@ -154,8 +154,8 @@ fn further_start(tokenizer: &mut Tokenizer, code: Code) -> State {
/// > | bbb
/// ^
/// ```
-fn further_end(_tokenizer: &mut Tokenizer, code: Code) -> State {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+fn further_end(_tokenizer: &mut Tokenizer, _code: Code) -> State {
+ State::Ok(0)
}
/// At the beginning of a line that is not indented enough.
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index e68d489..451ef45 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -190,7 +190,7 @@ fn sequence_close(tokenizer: &mut Tokenizer, code: Code, size_open: usize, size:
_ if size_open == size => {
tokenizer.exit(Token::CodeTextSequence);
tokenizer.exit(Token::CodeText);
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
_ => {
let index = tokenizer.events.len();
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 9e43d18..766bd8a 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -237,7 +237,7 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> State {
tokenizer.exit(Token::Definition);
// You’d be interrupting.
tokenizer.interrupt = true;
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
_ => State::Nok,
}
@@ -297,9 +297,7 @@ fn title_after(tokenizer: &mut Tokenizer, code: Code) -> State {
/// ```
fn title_after_after_optional_whitespace(_tokenizer: &mut Tokenizer, code: Code) -> State {
match code {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
- }
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => State::Ok(0),
_ => State::Nok,
}
}
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index 617b0ce..2ac693e 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -74,7 +74,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> State {
match code {
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(Token::HardBreakEscape);
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
_ => State::Nok,
}
diff --git a/src/construct/hard_break_trailing.rs b/src/construct/hard_break_trailing.rs
index 8ce4201..35097ec 100644
--- a/src/construct/hard_break_trailing.rs
+++ b/src/construct/hard_break_trailing.rs
@@ -81,7 +81,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, size: usize) -> State {
{
tokenizer.exit(Token::HardBreakTrailingSpace);
tokenizer.exit(Token::HardBreakTrailing);
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
_ => State::Nok,
}
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 1eabb56..4ef1192 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -134,7 +134,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> State {
tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve));
// Feel free to interrupt.
tokenizer.interrupt = false;
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.go(space_or_tab(), at_break)(tokenizer, code)
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 7aa0054..83c41e2 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -189,7 +189,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> State {
// Feel free to interrupt.
tokenizer.interrupt = false;
tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve));
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
_ => State::Nok,
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 7a7c25f..add2308 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -924,13 +924,13 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
/// > | <!doctype>
/// ^
/// ```
-fn continuation_after(tokenizer: &mut Tokenizer, code: Code) -> State {
+fn continuation_after(tokenizer: &mut Tokenizer, _code: Code) -> State {
tokenizer.exit(Token::HtmlFlow);
// Feel free to interrupt.
tokenizer.interrupt = false;
// No longer concrete.
tokenizer.concrete = false;
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
/// Before a line ending, expecting a blank line.
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 35dfcdf..13af833 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -320,7 +320,7 @@ fn reference_not_full(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
/// > | [a] b
/// ^
/// ```
-fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> State {
+fn ok(tokenizer: &mut Tokenizer, _code: Code, mut info: Info) -> State {
// Remove this one and everything after it.
let mut left = tokenizer
.label_start_stack
@@ -345,7 +345,7 @@ fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> State {
info.media.end.1 = tokenizer.events.len() - 1;
tokenizer.media_list.push(info.media);
tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media));
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
/// Done, it’s nothing.
@@ -526,7 +526,7 @@ fn full_reference(tokenizer: &mut Tokenizer, code: Code) -> State {
/// > | [a][b] d
/// ^
/// ```
-fn full_reference_after(tokenizer: &mut Tokenizer, code: Code) -> State {
+fn full_reference_after(tokenizer: &mut Tokenizer, _code: Code) -> State {
let events = &tokenizer.events;
let mut index = events.len() - 1;
let mut start: Option<usize> = None;
@@ -558,7 +558,7 @@ fn full_reference_after(tokenizer: &mut Tokenizer, code: Code) -> State {
false,
)))
{
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
} else {
State::Nok
}
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 7437d4a..ae3fc34 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -276,7 +276,7 @@ fn whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> State {
if matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) {
State::Nok
} else {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
}
@@ -304,7 +304,7 @@ fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> State {
/// > | * a
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer, code: Code, blank: bool) -> State {
+fn after(tokenizer: &mut Tokenizer, _code: Code, blank: bool) -> State {
if blank && tokenizer.interrupt {
State::Nok
} else {
@@ -322,7 +322,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code, blank: bool) -> State {
tokenizer.exit(Token::ListItemPrefix);
tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve_list_item));
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
}
@@ -377,8 +377,8 @@ pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> State {
}
/// A state fn to yield [`State::Ok`].
-pub fn ok(_tokenizer: &mut Tokenizer, code: Code) -> State {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+pub fn ok(_tokenizer: &mut Tokenizer, _code: Code) -> State {
+ State::Ok(0)
}
/// A state fn to yield [`State::Nok`].
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 5409532..bc980b2 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -69,7 +69,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> State {
tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve));
// You’d be interrupting.
tokenizer.interrupt = true;
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
_ => {
tokenizer.consume(code);
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index d60ef36..ce10763 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -42,7 +42,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, stop: &'static [Code]) -> Sta
}
_ if stop.contains(&code) => {
tokenizer.register_resolver("data".to_string(), Box::new(resolve_data));
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
_ => {
tokenizer.enter(Token::Data);
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index f898eb5..4a43ec2 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -224,7 +224,7 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> State {
tokenizer.exit(info.options.string.clone());
tokenizer.exit(info.options.raw.clone());
tokenizer.exit(info.options.destination);
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
} else {
tokenizer.consume(code);
info.balance -= 1;
@@ -242,7 +242,7 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> State {
tokenizer.exit(info.options.string.clone());
tokenizer.exit(info.options.raw.clone());
tokenizer.exit(info.options.destination);
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
}
Code::Char(char) if char.is_ascii_control() => State::Nok,
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
index c6ac493..62e8989 100644
--- a/src/construct/partial_non_lazy_continuation.rs
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -39,10 +39,10 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> State {
/// > | b
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer, code: Code) -> State {
+fn after(tokenizer: &mut Tokenizer, _code: Code) -> State {
if tokenizer.lazy {
State::Nok
} else {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
}
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 6eb3f1d..f13414a 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -149,7 +149,7 @@ fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> State {
}
_ => {
if info.options.min == 0 {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
} else {
State::Nok
}
@@ -173,7 +173,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> State {
_ => {
tokenizer.exit(info.options.kind.clone());
if info.size >= info.options.min {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
} else {
State::Nok
}
@@ -204,7 +204,7 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer, code: Code, mut info: EolInfo)
tokenizer.exit(Token::LineEnding);
State::Fn(Box::new(|t, c| after_eol(t, c, info)))
}
- _ if info.ok => State::Ok(if matches!(code, Code::None) { 0 } else { 1 }),
+ _ if info.ok => State::Ok(0),
_ => State::Nok,
}
}
@@ -245,6 +245,6 @@ fn after_more_space_or_tab(_tokenizer: &mut Tokenizer, code: Code) -> State {
) {
State::Nok
} else {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
}
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index 4fc013e..acdd4d1 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -57,6 +57,6 @@ fn at_eol(tokenizer: &mut Tokenizer, code: Code) -> State {
}
/// Fine.
-fn ok(_tokenizer: &mut Tokenizer, code: Code) -> State {
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+fn ok(_tokenizer: &mut Tokenizer, _code: Code) -> State {
+ State::Ok(0)
}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 4159146..66edaf8 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -183,7 +183,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> State {
tokenizer.exit(Token::ThematicBreak);
// Feel free to interrupt.
tokenizer.interrupt = false;
- State::Ok(if matches!(code, Code::None) { 0 } else { 1 })
+ State::Ok(0)
}
Code::Char(char) if char == info.kind.as_char() => {
tokenizer.enter(Token::ThematicBreakSequence);
diff --git a/src/content/document.rs b/src/content/document.rs
index c1017a7..f8d7b55 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -78,7 +78,7 @@ struct DocumentInfo {
pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
let mut tokenizer = Tokenizer::new(point, parse_state);
- tokenizer.push(&parse_state.codes, Box::new(start), true);
+ tokenizer.push(parse_state.codes.clone(), Box::new(start), true);
let mut index = 0;
let mut definitions = vec![];
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 2b5d775..0c9df34 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -84,6 +84,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
} else {
text
}));
+ let mut size = 0;
// Loop through links to pass them in order to the subtokenizer.
while let Some(index) = link_index {
@@ -96,7 +97,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
};
if link_curr.previous != None {
- tokenizer.define_skip(&enter.point);
+ tokenizer.define_skip(&enter.point, size);
}
let func = match state {
@@ -105,10 +106,13 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
};
state = tokenizer.push(
- span::codes(&parse_state.codes, &span),
+ span::codes(&parse_state.codes, &span).to_vec(),
func,
link_curr.next == None,
);
+
+ size += span.end_index - span.start_index;
+
link_index = link_curr.next;
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 931ffae..7ec0d91 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -173,7 +173,7 @@ struct InternalState {
#[allow(clippy::struct_excessive_bools)]
pub struct Tokenizer<'a> {
/// Jump between line endings.
- column_start: Vec<(usize, usize, usize)>,
+ column_start: Vec<(usize, usize, usize, usize)>,
// First line.
line_start: usize,
/// Track whether a character is expected to be consumed, and whether it’s
@@ -183,20 +183,20 @@ pub struct Tokenizer<'a> {
consumed: bool,
/// Track whether this tokenizer is done.
drained: bool,
+ /// Current character code.
+ current: Code,
+ /// Previous character code.
+ pub previous: Code,
+ /// Current relative and absolute place in the file.
+ pub point: Point,
/// Semantic labels of one or more codes in `codes`.
pub events: Vec<Event>,
/// Hierarchy of semantic labels.
///
/// Tracked to make sure everything’s valid.
pub stack: Vec<Token>,
- /// Previous character code.
- pub previous: Code,
/// To do.
pub map: EditMap,
- /// Current character code.
- current: Code,
- /// Current relative and absolute place in the file.
- pub point: Point,
/// List of attached resolvers, which will be called when done feeding,
/// to clean events.
resolvers: Vec<Box<Resolver>>,
@@ -204,6 +204,8 @@ pub struct Tokenizer<'a> {
resolver_ids: Vec<String>,
/// Shared parsing state across tokenizers.
pub parse_state: &'a ParseState<'a>,
+ codes: Vec<Code>,
+ pub index: usize,
/// Stack of label (start) that could form images and links.
///
/// Used when tokenizing [text content][crate::content::text].
@@ -216,6 +218,8 @@ pub struct Tokenizer<'a> {
///
/// Used when tokenizing [text content][crate::content::text].
pub media_list: Vec<Media>,
+ /// Current container state.
+ pub container: Option<ContainerState>,
/// Whether we would be interrupting something.
///
/// Used when tokenizing [flow content][crate::content::flow].
@@ -229,8 +233,6 @@ pub struct Tokenizer<'a> {
/// The previous line was a paragraph, and this line’s containers did not
/// match.
pub lazy: bool,
- /// Current container state.
- pub container: Option<ContainerState>,
}
impl<'a> Tokenizer<'a> {
@@ -248,14 +250,16 @@ impl<'a> Tokenizer<'a> {
stack: vec![],
events: vec![],
parse_state,
+ codes: vec![],
+ index: 0,
map: EditMap::new(),
label_start_stack: vec![],
label_start_list_loose: vec![],
media_list: vec![],
+ container: None,
interrupt: false,
concrete: false,
lazy: false,
- container: None,
// Assume about 10 resolvers.
resolvers: Vec::with_capacity(10),
resolver_ids: Vec::with_capacity(10),
@@ -288,8 +292,12 @@ impl<'a> Tokenizer<'a> {
}
/// Define a jump between two places.
- pub fn define_skip(&mut self, point: &Point) {
- define_skip_impl(self, point.line, (point.column, point.offset, point.index));
+ pub fn define_skip(&mut self, point: &Point, index: usize) {
+ define_skip_impl(
+ self,
+ point.line,
+ (point.column, point.offset, point.index, index),
+ );
}
/// Define the current place as a jump between two places.
@@ -297,7 +305,12 @@ impl<'a> Tokenizer<'a> {
define_skip_impl(
self,
self.point.line,
- (self.point.column, self.point.offset, self.point.index),
+ (
+ self.point.column,
+ self.point.offset,
+ self.point.index,
+ self.index,
+ ),
);
}
@@ -307,10 +320,11 @@ impl<'a> Tokenizer<'a> {
let at = self.point.line - self.line_start;
if self.point.column == 1 && at != self.column_start.len() {
- let (column, offset, index) = &self.column_start[at];
+ let (column, offset, index_abs, index_rel) = &self.column_start[at];
self.point.column = *column;
self.point.offset = *offset;
- self.point.index = *index;
+ self.point.index = *index_abs;
+ self.index = *index_rel;
}
}
@@ -326,6 +340,7 @@ impl<'a> Tokenizer<'a> {
assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned");
self.point.index += 1;
+ self.index += 1;
match code {
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
@@ -342,6 +357,7 @@ impl<'a> Tokenizer<'a> {
self.point.column,
self.point.offset,
self.point.index,
+ self.index,
));
}
@@ -482,11 +498,13 @@ impl<'a> Tokenizer<'a> {
) -> Box<StateFn> {
attempt_impl(
state_fn,
- |_code| false,
- vec![],
- |result: (&[Code], &[Code]), tokenizer: &mut Tokenizer, state| {
+ None,
+ self.index,
+ |result: (usize, usize), tokenizer: &mut Tokenizer, state| {
if matches!(state, State::Ok(_)) {
- feed_impl(tokenizer, result.1, after)
+ tokenizer.index = result.1;
+ tokenizer.consumed = true;
+ State::Fn(Box::new(after))
} else {
state
}
@@ -505,11 +523,12 @@ impl<'a> Tokenizer<'a> {
) -> Box<StateFn> {
attempt_impl(
state_fn,
- until,
- vec![],
- |result: (&[Code], &[Code]), tokenizer: &mut Tokenizer, state| {
+ Some(Box::new(until)),
+ self.index,
+ |result: (usize, usize), tokenizer: &mut Tokenizer, state| {
+ tokenizer.index = result.1;
tokenizer.consumed = true;
- feed_impl(tokenizer, result.1, done(state))
+ State::Fn(done(state))
},
)
}
@@ -532,16 +551,13 @@ impl<'a> Tokenizer<'a> {
attempt_impl(
state_fn,
- |_code| false,
- vec![],
- |result: (&[Code], &[Code]), tokenizer: &mut Tokenizer, state| {
+ None,
+ self.index,
+ |result: (usize, usize), tokenizer: &mut Tokenizer, state| {
tokenizer.free(previous);
- feed_twice_impl(
- tokenizer,
- result.0,
- result.1,
- done(matches!(state, State::Ok(_))),
- )
+ tokenizer.index = result.0;
+ tokenizer.consumed = true;
+ State::Fn(done(matches!(state, State::Ok(_))))
},
)
}
@@ -566,9 +582,9 @@ impl<'a> Tokenizer<'a> {
attempt_impl(
state_fn,
- |_code| false,
- vec![],
- |result: (&[Code], &[Code]), tokenizer: &mut Tokenizer, state| {
+ None,
+ self.index,
+ |result: (usize, usize), tokenizer: &mut Tokenizer, state| {
let ok = matches!(state, State::Ok(_));
if !ok {
@@ -577,12 +593,9 @@ impl<'a> Tokenizer<'a> {
log::debug!("attempt: {:?}, at {:?}", ok, tokenizer.point);
- feed_twice_impl(
- tokenizer,
- if ok { &[] } else { result.0 },
- result.1,
- done(ok),
- )
+ tokenizer.index = result.1;
+ tokenizer.consumed = true;
+ State::Fn(done(ok))
},
)
}
@@ -623,7 +636,7 @@ impl<'a> Tokenizer<'a> {
/// markdown into the state machine, and normally pauses after feeding.
pub fn push(
&mut self,
- codes: &[Code],
+ mut codes: Vec<Code>,
start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
drain: bool,
) -> State {
@@ -632,7 +645,9 @@ impl<'a> Tokenizer<'a> {
// Let’s assume an event per character.
self.events.reserve(codes.len());
- let mut result = feed_impl(self, codes, start);
+ self.codes.append(&mut codes);
+
+ let mut result = feed_impl(self, start);
if drain {
let func = match result {
@@ -667,41 +682,34 @@ impl<'a> Tokenizer<'a> {
/// Used in [`Tokenizer::attempt`][Tokenizer::attempt] and [`Tokenizer::check`][Tokenizer::check].
fn attempt_impl(
state: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
- pause: impl Fn(Code) -> bool + 'static,
- mut codes: Vec<Code>,
- done: impl FnOnce((&[Code], &[Code]), &mut Tokenizer, State) -> State + 'static,
+ pause: Option<Box<dyn Fn(Code) -> bool + 'static>>,
+ start: usize,
+ done: impl FnOnce((usize, usize), &mut Tokenizer, State) -> State + 'static,
) -> Box<StateFn> {
- Box::new(|tokenizer, code| {
- if !codes.is_empty() && pause(tokenizer.previous) {
- let after = if matches!(code, Code::None) {
- vec![]
- } else {
- vec![code]
- };
-
- return done((&codes, &after), tokenizer, State::Fn(Box::new(state)));
+ Box::new(move |tokenizer, code| {
+ if let Some(ref func) = pause {
+ if tokenizer.index > start && func(tokenizer.previous) {
+ return done(
+ (start, tokenizer.index),
+ tokenizer,
+ State::Fn(Box::new(state)),
+ );
+ }
}
let state = state(tokenizer, code);
- match code {
- Code::None => {}
- _ => {
- codes.push(code);
- }
- }
-
match state {
State::Ok(back) => {
+ let stop = tokenizer.index - back;
assert!(
- back <= codes.len(),
- "`back` must be smaller than or equal to `codes.len()`"
+ stop >= start,
+ "`back` must not result in an index smaller than `start`"
);
- let remaining = codes.split_off(codes.len() - back);
- done((&codes, &remaining), tokenizer, state)
+ done((start, stop), tokenizer, state)
}
- State::Nok => done((&[], &codes), tokenizer, state),
- State::Fn(func) => State::Fn(attempt_impl(func, pause, codes, done)),
+ State::Nok => done((start, start), tokenizer, state),
+ State::Fn(func) => State::Fn(attempt_impl(func, pause, start, done)),
}
})
}
@@ -709,28 +717,23 @@ fn attempt_impl(
/// Feed a list of `codes` into `start`.
fn feed_impl(
tokenizer: &mut Tokenizer,
- codes: &[Code],
start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
) -> State {
let mut state = State::Fn(Box::new(start));
- let mut index = 0;
tokenizer.consumed = true;
- while index < codes.len() {
- let code = codes[index];
+ while tokenizer.index < tokenizer.codes.len() {
+ let code = tokenizer.codes[tokenizer.index];
match state {
- State::Ok(back) => {
- state = State::Ok((codes.len() - index) + back);
+ State::Ok(_) | State::Nok => {
break;
}
- State::Nok => break,
State::Fn(func) => {
- log::debug!("main: passing: `{:?}` ({:?})", code, index);
+ log::debug!("main: passing: `{:?}` ({:?})", code, tokenizer.index);
tokenizer.expect(code, false);
state = func(tokenizer, code);
- index += 1;
}
}
}
@@ -738,37 +741,27 @@ fn feed_impl(
state
}
-/// Feed a list of `codes` into `start`.
-fn feed_twice_impl(
- tokenizer: &mut Tokenizer,
- left: &[Code],
- right: &[Code],
- start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
-) -> State {
- let res = feed_impl(tokenizer, left, start);
-
- match res {
- State::Fn(func) => feed_impl(tokenizer, right, func),
- State::Ok(back) => State::Ok(back + right.len()),
- State::Nok => res,
- }
-}
-
/// Flush `start`: pass `eof`s to it until done.
fn flush_impl(
tokenizer: &mut Tokenizer,
start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
) -> State {
let mut state = State::Fn(Box::new(start));
+ let max = tokenizer.index;
tokenizer.consumed = true;
loop {
match state {
State::Ok(_) | State::Nok => break,
State::Fn(func) => {
- log::debug!("main: passing eof");
- tokenizer.expect(Code::None, false);
- state = func(tokenizer, Code::None);
+ let code = if tokenizer.index < max {
+ tokenizer.codes[tokenizer.index]
+ } else {
+ Code::None
+ };
+ log::debug!("main: flushing {:?}", code);
+ tokenizer.expect(code, false);
+ state = func(tokenizer, code);
}
}
}
@@ -785,7 +778,7 @@ fn flush_impl(
///
/// This defines how much columns, offsets, and the `index` are increased when
/// consuming a line ending.
-fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize)) {
+fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize, usize)) {
log::debug!("position: define skip: {:?} -> ({:?})", line, info);
let at = line - tokenizer.line_start;