aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/code_indented.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-01 15:36:38 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-01 15:39:01 +0200
commit41afec1ed898159e1df3bc1157768f2066dd85e5 (patch)
treed497994301b93c49116993198ef8824f6ce68b85 /src/construct/code_indented.rs
parent09fd0321daae69d52532b4bef762a202efe9a12e (diff)
downloadmarkdown-rs-41afec1ed898159e1df3bc1157768f2066dd85e5.tar.gz
markdown-rs-41afec1ed898159e1df3bc1157768f2066dd85e5.tar.bz2
markdown-rs-41afec1ed898159e1df3bc1157768f2066dd85e5.zip
Make paragraphs really fast
The approach that `micromark-js` takes is as follows: to parse a paragraph, check whether each line starts with something else. If it does, exit, otherwise continue. That is slow, because our actual flow parser does similar things: the work was being done twice. To fix this, this commit introduces parsing each line of a paragraph separately. And finally, when done with flow, combining adjacent paragraphs. This same mechanism is reused for setext headings. Additionally, this commit adds support for interrupting things (or not). E.g., HTML (flow, complete) cannot interrupt paragraphs. Definitions cannot interrupt paragraphs, and connect be interrupted either, but they can follow each other.
Diffstat (limited to 'src/construct/code_indented.rs')
-rw-r--r--src/construct/code_indented.rs11
1 files changed, 9 insertions, 2 deletions
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 99445b9..9bdfd71 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -59,8 +59,13 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// > filled line (that it has a non-whitespace character), because blank lines
/// > are parsed already, so we never run into that.
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.enter(TokenType::CodeIndented);
- tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer, code)
+ // Do not interrupt paragraphs.
+ if tokenizer.interrupt {
+ (State::Nok, None)
+ } else {
+ tokenizer.enter(TokenType::CodeIndented);
+ tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer, code)
+ }
}
/// At a break.
@@ -110,6 +115,8 @@ fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::CodeIndented);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
(State::Ok, Some(vec![code]))
}