From 41afec1ed898159e1df3bc1157768f2066dd85e5 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 1 Jul 2022 15:36:38 +0200 Subject: Make paragraphs really fast The approach that `micromark-js` takes is as follows: to parse a paragraph, check whether each line starts with something else. If it does, exit, otherwise continue. That is slow, because our actual flow parser does similar things: the work was being done twice. To fix this, this commit introduces parsing each line of a paragraph separately. And finally, when done with flow, combining adjacent paragraphs. This same mechanism is reused for setext headings. Additionally, this commit adds support for interrupting things (or not). E.g., HTML (flow, complete) cannot interrupt paragraphs. Definitions cannot interrupt paragraphs, and connect be interrupted either, but they can follow each other. --- tests/autolink.rs | 2 +- tests/character_escape.rs | 2 +- tests/character_reference.rs | 2 +- tests/code_fenced.rs | 2 +- tests/code_indented.rs | 2 +- tests/definition.rs | 13 ++++++------- tests/hard_break_escape.rs | 2 +- tests/hard_break_trailing.rs | 2 +- tests/heading_atx.rs | 2 +- tests/html_flow.rs | 13 ++++++------- tests/html_text.rs | 2 +- tests/thematic_break.rs | 2 +- 12 files changed, 22 insertions(+), 24 deletions(-) (limited to 'tests') diff --git a/tests/autolink.rs b/tests/autolink.rs index f0486ef..9c28834 100644 --- a/tests/autolink.rs +++ b/tests/autolink.rs @@ -252,7 +252,7 @@ fn autolink() { "should not support a dash before a dot in email autolinks" ); - // To do: extensions. + // To do: turning things off. // assert_eq!( // micromark("", {extensions: [{disable: {null: ["autolink"]}}]}), // "

<a@b.co>

", diff --git a/tests/character_escape.rs b/tests/character_escape.rs index 26e9336..6200014 100644 --- a/tests/character_escape.rs +++ b/tests/character_escape.rs @@ -79,7 +79,7 @@ fn character_escape() { "should escape in fenced code info" ); - // // To do: extensions + // // To do: turning things off // assert_eq!( // micromark("\\> a", {extensions: [{disable: {null: ["characterEscape"]}}]}), // "

\\> a

", diff --git a/tests/character_reference.rs b/tests/character_reference.rs index 3951e00..c87657e 100644 --- a/tests/character_reference.rs +++ b/tests/character_reference.rs @@ -190,7 +190,7 @@ fn character_reference() { "should not support the other characters inside a hexademical" ); - // To do: extensions. + // To do: turning things off. // assert_eq!( // micromark("&", { // extensions: [{disable: {null: ["characterReferences"]}}] diff --git a/tests/code_fenced.rs b/tests/code_fenced.rs index 0e19637..b7d8307 100644 --- a/tests/code_fenced.rs +++ b/tests/code_fenced.rs @@ -252,7 +252,7 @@ fn code_fenced() { // "should not support lazyness (3)" // ); - // To do: extensions. + // To do: turning things off. // assert_eq!( // micromark("```", {extensions: [{disable: {null: ["codeFenced"]}}]}), // "

```

", diff --git a/tests/code_indented.rs b/tests/code_indented.rs index 0190497..773e3d4 100644 --- a/tests/code_indented.rs +++ b/tests/code_indented.rs @@ -119,7 +119,7 @@ fn code_indented() { // "should not support lazyness (7)" // ); - // To do: extensions. + // To do: turning things off. // assert_eq!( // micromark(" a", {extensions: [{disable: {null: ["codeIndented"]}}]}), // "

a

", diff --git a/tests/definition.rs b/tests/definition.rs index ba4e384..df99f74 100644 --- a/tests/definition.rs +++ b/tests/definition.rs @@ -375,12 +375,11 @@ fn definition() { "should not support a final (unbalanced) right paren in a raw destination “before” a title" ); - // To do: do not let code (indented) interrupt definitions. - // assert_eq!( - // micromark(" [a]: b \"c\"\n [d]: e\n [f]: g \"h\"\n [i]: j\n\t[k]: l (m)\n\t n [k] o"), - // "

n k o

", - // "should support subsequent indented definitions" - // ); + assert_eq!( + micromark(" [a]: b \"c\"\n [d]: e\n [f]: g \"h\"\n [i]: j\n\t[k]: l (m)\n\t n [k] o"), + "

n k o

", + "should support subsequent indented definitions" + ); assert_eq!( micromark("[a\n b]: c\n\n[a\n b]"), @@ -406,7 +405,7 @@ fn definition() { "should not support definitions w/ text + a closing paren as a raw destination" ); - // To do: support turning off things. + // To do: turning things off. // assert_eq!( // micromark("[foo]: /url \"title\"", { // extensions: [{disable: {null: ["definition"]}}] diff --git a/tests/hard_break_escape.rs b/tests/hard_break_escape.rs index c4f6f1d..740e706 100644 --- a/tests/hard_break_escape.rs +++ b/tests/hard_break_escape.rs @@ -40,7 +40,7 @@ fn hard_break_escape() { "should not support escape hard breaks at the end of a heading" ); - // // To do: turning off things. + // // To do: turning things off. // assert_eq!( // micromark("a\\\nb", {extensions: [{disable: {null: ["hardBreakEscape"]}}]}), // "

a\\\nb

", diff --git a/tests/hard_break_trailing.rs b/tests/hard_break_trailing.rs index 0dbbbdb..2a4b534 100644 --- a/tests/hard_break_trailing.rs +++ b/tests/hard_break_trailing.rs @@ -118,7 +118,7 @@ fn hard_break_trailing() { // "should support a mixed line suffix after a span (3)" // ); - // // To do: turning off things. + // // To do: turning things off. // assert_eq!( // micromark("a \nb", {extensions: [{disable: {null: ["hardBreakTrailing"]}}]}), // "

a\nb

", diff --git a/tests/heading_atx.rs b/tests/heading_atx.rs index 2548056..ef5846a 100644 --- a/tests/heading_atx.rs +++ b/tests/heading_atx.rs @@ -196,7 +196,7 @@ fn heading_atx() { // "should not support lazyness (2)" // ); - // Extensions: + // To do: turning things off: // assert_eq!( // micromark("# a", {extensions: [{disable: {null: ["headingAtx"]}}]}), // "

# a

", diff --git a/tests/html_flow.rs b/tests/html_flow.rs index 455c5b8..3b69671 100644 --- a/tests/html_flow.rs +++ b/tests/html_flow.rs @@ -21,7 +21,7 @@ fn html_flow() { "should support a heading w/ rank 1" ); - // To do: extensions. + // To do: turning things off. // assert_eq!( // micromark_with_options("", {extensions: [{disable: {null: ["htmlFlow"]}}]}), // "

<x>

", @@ -789,12 +789,11 @@ fn html_flow_7_complete() { "should support interleaving w/ whitespace-only blank lines" ); - // To do: disallow html (complete) from interrupting. - // assert_eq!( - // micromark_with_options("Foo\n\nbaz", DANGER), - // "

Foo\n\nbaz

", - // "should not support interrupting paragraphs w/ complete tags" - // ); + assert_eq!( + micromark_with_options("Foo\n\nbaz", DANGER), + "

Foo\n\nbaz

", + "should not support interrupting paragraphs w/ complete tags" + ); assert_eq!( micromark_with_options(" a", DANGER), "should support an EOL in an instruction" ); - // To do: extensions. + // To do: turning things off. // assert_eq!( // micromark_with_options("a ", {extensions: [{disable: {null: ["htmlText"]}}]}), // "

a <x>

", diff --git a/tests/thematic_break.rs b/tests/thematic_break.rs index e71ae22..06b1193 100644 --- a/tests/thematic_break.rs +++ b/tests/thematic_break.rs @@ -169,7 +169,7 @@ fn thematic_break() { // "should not support lazyness (2)" // ); - // To do: extensions. + // To do: turning things off. // assert_eq!( // micromark("***", {extensions: [{disable: {null: ["thematicBreak"]}}]}), // "

***

", -- cgit