From 41afec1ed898159e1df3bc1157768f2066dd85e5 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 1 Jul 2022 15:36:38 +0200 Subject: Make paragraphs really fast The approach that `micromark-js` takes is as follows: to parse a paragraph, check whether each line starts with something else. If it does, exit, otherwise continue. That is slow, because our actual flow parser does similar things: the work was being done twice. To fix this, this commit introduces parsing each line of a paragraph separately. And finally, when done with flow, combining adjacent paragraphs. This same mechanism is reused for setext headings. Additionally, this commit adds support for interrupting things (or not). E.g., HTML (flow, complete) cannot interrupt paragraphs. Definitions cannot interrupt paragraphs, and connect be interrupted either, but they can follow each other. --- src/content/flow.rs | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'src/content/flow.rs') diff --git a/src/content/flow.rs b/src/content/flow.rs index 0d3ede0..3ff948d 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -92,26 +92,6 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// After a blank line. -/// -/// Move to `start` afterwards. -/// -/// ```markdown -/// ␠␠| -/// ``` -fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None => (State::Ok, None), - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - tokenizer.enter(TokenType::BlankLineEnding); - tokenizer.consume(code); - tokenizer.exit(TokenType::BlankLineEnding); - (State::Fn(Box::new(start)), None) - } - _ => unreachable!("expected eol/eof after blank line `{:?}`", code), - } -} - /// Before flow (initial). /// /// “Initial” flow means unprefixed flow, so right at the start of a line. @@ -133,16 +113,38 @@ fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { Box::new(code_fenced), Box::new(html_flow), Box::new(heading_atx), + Box::new(heading_setext), Box::new(thematic_break), Box::new(definition), - Box::new(heading_setext), ], |ok| Box::new(if ok { after } else { before_paragraph }), )(tokenizer, code), } } -/// After a flow construct. +/// After a blank line. +/// +/// Move to `start` afterwards. +/// +/// ```markdown +/// ␠␠| +/// ``` +fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => (State::Ok, None), + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.enter(TokenType::BlankLineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::BlankLineEnding); + // Feel free to interrupt. + tokenizer.interrupt = false; + (State::Fn(Box::new(start)), None) + } + _ => unreachable!("expected eol/eof after blank line `{:?}`", code), + } +} + +/// After something. /// /// ```markdown /// ## alpha| -- cgit