Refactor to drastically improve perf around whitespace

author: Titus Wormer <tituswormer@gmail.com> 2022-07-26 16:37:13 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-26 16:37:13 +0200
commit: a6b317ac7fbc95b8584056b3cebffbf9d1bba2c6 (patch)
tree: fd7be2fe6d7355d3aafaf8b731f0e0b48624debc
parent: 297784cb925b1196d89479fa24c898703ae598d6 (diff)
download: markdown-rs-a6b317ac7fbc95b8584056b3cebffbf9d1bba2c6.tar.gz
markdown-rs-a6b317ac7fbc95b8584056b3cebffbf9d1bba2c6.tar.bz2
markdown-rs-a6b317ac7fbc95b8584056b3cebffbf9d1bba2c6.zip
13 files changed, 263 insertions, 244 deletions
diff --git a/src/constant.rs b/src/constant.rs
index b18bf3f..b8b36ad 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -68,9 +68,9 @@ pub const CHARACTER_REFERENCE_NAMED_SIZE_MAX: usize = 31;
 pub const CODE_FENCED_SEQUENCE_SIZE_MIN: usize = 3;
 
 /// The number of preceding spaces needed for a [hard break
-/// (trailing)][hard_break_trailing] to form.
+/// (trailing)][whitespace] to form.
 ///
-/// [hard_break_trailing]: crate::construct::hard_break_trailing
+/// [whitespace]: crate::construct::partial_whitespace
 pub const HARD_BREAK_PREFIX_SIZE_MIN: usize = 2;
 
 /// The max number of markers allowed to form a [heading (atx)][heading_atx].
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index d45d685..40a83ef 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -27,7 +27,6 @@
 //! ## Tokens
 //!
 //! *   [`HardBreakEscape`][Token::HardBreakEscape]
-//! *   [`HardBreakEscapeMarker`][Token::HardBreakEscapeMarker]
 //!
 //! ## References
 //!
@@ -37,7 +36,7 @@
 //! [text]: crate::content::text
 //! [character_escape]: crate::construct::character_escape
 //! [character_reference]: crate::construct::character_reference
-//! [hard_break_trailing]: crate::construct::hard_break_trailing
+//! [hard_break_trailing]: crate::construct::partial_whitespace
 //! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element
 
 use crate::token::Token;
@@ -54,9 +53,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         Code::Char('\\') if tokenizer.parse_state.constructs.hard_break_escape => {
             tokenizer.enter(Token::HardBreakEscape);
-            tokenizer.enter(Token::HardBreakEscapeMarker);
             tokenizer.consume();
-            tokenizer.exit(Token::HardBreakEscapeMarker);
             State::Fn(Box::new(inside))
         }
         _ => State::Nok,
diff --git a/src/construct/hard_break_trailing.rs b/src/construct/hard_break_trailing.rs
deleted file mode 100644
index f0ef83b..0000000
--- a/src/construct/hard_break_trailing.rs
+++ /dev/null
@@ -1,88 +0,0 @@
-//! Hard break (trailing) is a construct that occurs in the  [text][] content
-//! type.
-//!
-//! They’re formed with the following BNF:
-//!
-//! ```bnf
-//! ; Restriction: followed by a line ending  (that is part of the construct
-//! ; instead of ending it).
-//! hard_break_trailing ::= 2*' '
-//! ```
-//!
-//! The minimum number of the spaces is defined in
-//! [`HARD_BREAK_PREFIX_SIZE_MIN`][hard_break_prefix_size_min].
-//!
-//! Hard breaks in markdown relate to the HTML element `<br>`.
-//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info.
-//!
-//! It is also possible to create a hard break with a similar construct: a
-//! [hard break (escape)][hard_break_escape] is a backslash followed
-//! by a line ending.
-//! That construct is recommended because it is similar to a
-//! [character escape][character_escape] and similar to how line endings can be
-//! “escaped” in other languages.
-//! Trailing spaces are typically invisible in editors, or even automatically
-//! removed, making hard break (trailing) hard to use.
-//!
-//! ## Tokens
-//!
-//! *   [`HardBreakTrailing`][Token::HardBreakTrailing]
-//! *   [`HardBreakTrailingSpace`][Token::HardBreakTrailingSpace]
-//!
-//! ## References
-//!
-//! *   [`lib/initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js)
-//! *   [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks)
-//!
-//! [text]: crate::content::text
-//! [hard_break_escape]: crate::construct::hard_break_escape
-//! [character_escape]: crate::construct::character_escape
-//! [hard_break_prefix_size_min]: crate::constant::HARD_BREAK_PREFIX_SIZE_MIN
-//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element
-
-use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN;
-use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
-
-/// Start of a hard break (trailing).
-///
-/// ```markdown
-/// > | a␠␠
-///      ^
-///   | b
-/// ```
-pub fn start(tokenizer: &mut Tokenizer) -> State {
-    match tokenizer.current {
-        Code::Char(' ') if tokenizer.parse_state.constructs.hard_break_trailing => {
-            tokenizer.enter(Token::HardBreakTrailing);
-            tokenizer.enter(Token::HardBreakTrailingSpace);
-            tokenizer.consume();
-            State::Fn(Box::new(|t| inside(t, 1)))
-        }
-        _ => State::Nok,
-    }
-}
-
-/// Inside the hard break (trailing).
-///
-/// ```markdown
-/// > | a␠␠
-///      ^
-///   | b
-/// ```
-fn inside(tokenizer: &mut Tokenizer, size: usize) -> State {
-    match tokenizer.current {
-        Code::Char(' ') => {
-            tokenizer.consume();
-            State::Fn(Box::new(move |t| inside(t, size + 1)))
-        }
-        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
-            if size >= HARD_BREAK_PREFIX_SIZE_MIN =>
-        {
-            tokenizer.exit(Token::HardBreakTrailingSpace);
-            tokenizer.exit(Token::HardBreakTrailing);
-            State::Ok
-        }
-        _ => State::Nok,
-    }
-}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index be9dfe3..569c609 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -25,7 +25,6 @@
 //! *   [code (text)][code_text]
 //! *   [definition][]
 //! *   [hard break (escape)][hard_break_escape]
-//! *   [hard break (trailing)][hard_break_trailing]
 //! *   [heading (atx)][heading_atx]
 //! *   [heading (setext)][heading_setext]
 //! *   [html (flow)][html_flow]
@@ -37,6 +36,9 @@
 //! *   [paragraph][]
 //! *   [thematic break][thematic_break]
 //!
+//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
+//! > [whitespace][partial_whitespace].
+//!
 //! There are also several routines used in different places:
 //!
 //! *   [data][partial_data]
@@ -73,7 +75,6 @@ pub mod code_indented;
 pub mod code_text;
 pub mod definition;
 pub mod hard_break_escape;
-pub mod hard_break_trailing;
 pub mod heading_atx;
 pub mod heading_setext;
 pub mod html_flow;
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 86492b5..4216276 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -41,7 +41,7 @@ fn at_break(tokenizer: &mut Tokenizer, stop: &'static [Code]) -> State {
             State::Fn(Box::new(move |t| at_break(t, stop)))
         }
         _ if stop.contains(&tokenizer.current) => {
-            tokenizer.register_resolver("data".to_string(), Box::new(resolve_data));
+            tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data));
             State::Ok
         }
         _ => {
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index a97ac29..5f1a917 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -98,7 +98,7 @@ pub fn space_or_tab_eol() -> Box<StateFn> {
 pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
     Box::new(move |tokenizer| {
         let mut info = EolInfo {
-            connect: false,
+            connect: options.connect,
             ok: false,
             options,
         };
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index afff1c4..4c94c7d 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -1,62 +1,211 @@
 //! Trailing whitespace occurs in [string][] and [text][].
 //!
-//! It occurs at the start or end of the whole, or around line endings.
-//! This whitespace is ignored
+//! It occurs around line endings, and, in the case of text content it also
+//! occurs at the start or end of the whole.
 //!
 //! They’re formed with the following BNF:
 //!
 //! ```bnf
-//! ; Restriction: the start and end here count as an eol.
+//! ; Restriction: the start and end here count as an eol in the case of `text`.
 //! whitespace ::= 0.*space_or_tab eol 0.*space_or_tab
 //! ```
 //!
-//! This is similar to [`space_or_tab_eol`][space_or_tab_eol], with the main
-//! difference that that *does not* require a line ending and parses any
-//! `space_or_tab` with one line ending.
-//! This instead *requires* the line ending (or eol).
+//! Normally this whitespace is ignored.
+//! In the case of text content, whitespace before a line ending that
+//! consistents solely of spaces, at least 2, forms a hard break (trailing).
+//!
+//! The minimum number of the spaces is defined in
+//! [`HARD_BREAK_PREFIX_SIZE_MIN`][hard_break_prefix_size_min].
+//!
+//! Hard breaks in markdown relate to the HTML element `<br>`.
+//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info.
+//!
+//! It is also possible to create a hard break with a similar construct: a
+//! [hard break (escape)][hard_break_escape] is a backslash followed
+//! by a line ending.
+//! That construct is recommended because it is similar to a
+//! [character escape][character_escape] and similar to how line endings can be
+//! “escaped” in other languages.
+//! Trailing spaces are typically invisible in editors, or even automatically
+//! removed, making hard break (trailing) hard to use.
+//! ## Tokens
+//!
+//! *   [`HardBreakTrailing`][Token::HardBreakTrailing]
+//! *   [`SpaceOrTab`][Token::SpaceOrTab]
 //!
 //! ## References
 //!
 //! *   [`initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js)
+//! *   [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks)
 //!
 //! [string]: crate::content::string
 //! [text]: crate::content::text
-//! [space_or_tab_eol]: crate::construct::partial_space_or_tab::space_or_tab_eol
-
-use super::partial_space_or_tab::space_or_tab;
-use crate::tokenizer::{Code, State, Tokenizer};
-
-/// Parse initial or final whitespace.
-pub fn whitespace(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.go(
-        // Nothing if there’s no whitespace.
-        space_or_tab(),
-        if matches!(
-            tokenizer.previous,
-            Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
-        ) {
-            // If there’s whitespace, and we were at an eol/eof, `ok`
-            ok
-        } else {
-            // If there’s whitespace, and we were not at an eol/eof, there must be one here.
-            at_eol
-        },
-    )(tokenizer)
+//! [hard_break_escape]: crate::construct::hard_break_escape
+//! [character_escape]: crate::construct::character_escape
+//! [hard_break_prefix_size_min]: crate::constant::HARD_BREAK_PREFIX_SIZE_MIN
+//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element
+
+use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN;
+use crate::token::Token;
+use crate::tokenizer::{Code, Event, EventType, Tokenizer};
+use crate::util::span;
+
+/// To do.
+pub fn create_resolve_whitespace(hard_break: bool, trim_whole: bool) -> impl Fn(&mut Tokenizer) {
+    move |t| resolve_whitespace(t, hard_break, trim_whole)
 }
 
-/// After whitespace, at an eol/eof.
-fn at_eol(tokenizer: &mut Tokenizer) -> State {
-    if matches!(
-        tokenizer.current,
-        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
-    ) {
-        ok(tokenizer)
-    } else {
-        State::Nok
+/// To do.
+pub fn resolve_whitespace(tokenizer: &mut Tokenizer, hard_break: bool, trim_whole: bool) {
+    let mut index = 0;
+
+    while index < tokenizer.events.len() {
+        let event = &tokenizer.events[index];
+
+        if event.event_type == EventType::Exit && event.token_type == Token::Data {
+            let trim_start = (trim_whole && index == 1)
+                || (index > 1 && tokenizer.events[index - 2].token_type == Token::LineEnding);
+            let trim_end = (trim_whole && index == tokenizer.events.len() - 1)
+                || (index + 1 < tokenizer.events.len()
+                    && tokenizer.events[index + 1].token_type == Token::LineEnding);
+
+            trim_data(tokenizer, index, trim_start, trim_end, hard_break);
+        }
+
+        index += 1;
     }
 }
 
-/// Fine.
-fn ok(_tokenizer: &mut Tokenizer) -> State {
-    State::Ok
+/// To do.
+#[allow(clippy::too_many_lines)]
+fn trim_data(
+    tokenizer: &mut Tokenizer,
+    exit_index: usize,
+    trim_start: bool,
+    trim_end: bool,
+    hard_break: bool,
+) {
+    let mut codes = span::codes(
+        &tokenizer.parse_state.codes,
+        &span::from_exit_event(&tokenizer.events, exit_index),
+    );
+
+    if trim_end {
+        let mut index = codes.len();
+        let mut vs = 0;
+        let mut spaces_only = true;
+        while index > 0 {
+            match codes[index - 1] {
+                Code::Char(' ') => {}
+                Code::Char('\t') => spaces_only = false,
+                Code::VirtualSpace => {
+                    vs += 1;
+                    spaces_only = false;
+                }
+                _ => break,
+            }
+
+            index -= 1;
+        }
+
+        let diff = codes.len() - index;
+        let token_type = if spaces_only
+            && hard_break
+            && exit_index + 1 < tokenizer.events.len()
+            && diff >= HARD_BREAK_PREFIX_SIZE_MIN
+        {
+            Token::HardBreakTrailing
+        } else {
+            Token::SpaceOrTab
+        };
+
+        // The whole data is whitespace.
+        // We can be very fast: we only change the token types.
+        if index == 0 {
+            tokenizer.events[exit_index - 1].token_type = token_type.clone();
+            tokenizer.events[exit_index].token_type = token_type;
+            return;
+        }
+
+        if diff > 0 {
+            let exit_point = tokenizer.events[exit_index].point.clone();
+            let mut enter_point = exit_point.clone();
+            enter_point.index -= diff;
+            enter_point.column -= diff - vs;
+            enter_point.offset -= diff - vs;
+
+            tokenizer.map.add(
+                exit_index + 1,
+                0,
+                vec![
+                    Event {
+                        event_type: EventType::Enter,
+                        token_type: token_type.clone(),
+                        point: enter_point.clone(),
+                        link: None,
+                    },
+                    Event {
+                        event_type: EventType::Exit,
+                        token_type,
+                        point: exit_point,
+                        link: None,
+                    },
+                ],
+            );
+
+            tokenizer.events[exit_index].point = enter_point;
+            codes = &codes[..index];
+        }
+    }
+
+    if trim_start {
+        let mut index = 0;
+        let mut vs = 0;
+        while index < codes.len() {
+            match codes[index] {
+                Code::Char(' ' | '\t') => {}
+                Code::VirtualSpace => vs += 1,
+                _ => break,
+            }
+
+            index += 1;
+        }
+
+        // The whole data is whitespace.
+        // We can be very fast: we only change the token types.
+        if index == codes.len() {
+            tokenizer.events[exit_index - 1].token_type = Token::SpaceOrTab;
+            tokenizer.events[exit_index].token_type = Token::SpaceOrTab;
+            return;
+        }
+
+        if index > 0 {
+            let enter_point = tokenizer.events[exit_index - 1].point.clone();
+            let mut exit_point = enter_point.clone();
+            exit_point.index += index;
+            exit_point.column += index - vs;
+            exit_point.offset += index - vs;
+
+            tokenizer.map.add(
+                exit_index - 1,
+                0,
+                vec![
+                    Event {
+                        event_type: EventType::Enter,
+                        token_type: Token::SpaceOrTab,
+                        point: enter_point,
+                        link: None,
+                    },
+                    Event {
+                        event_type: EventType::Exit,
+                        token_type: Token::SpaceOrTab,
+                        point: exit_point.clone(),
+                        link: None,
+                    },
+                ],
+            );
+
+            tokenizer.events[exit_index - 1].point = exit_point;
+        }
+    }
 }
diff --git a/src/content/document.rs b/src/content/document.rs
index 0c3cef7..32b32ba 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -251,12 +251,13 @@ fn container_new_before(tokenizer: &mut Tokenizer, info: DocumentInfo) -> State
                 });
 
                 tokenizer.attempt(list_item, |ok| {
-                    let func = if ok {
-                        container_new_after
-                    } else {
-                        containers_after
-                    };
-                    Box::new(move |t| func(t, info))
+                    Box::new(move |t| {
+                        if ok {
+                            container_new_after(t, info)
+                        } else {
+                            containers_after(t, info)
+                        }
+                    })
                 })(tokenizer)
             })
         }
diff --git a/src/content/string.rs b/src/content/string.rs
index 6d45f94..c6c0094 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -14,37 +14,33 @@
 
 use crate::construct::{
     character_escape::start as character_escape, character_reference::start as character_reference,
-    partial_data::start as data, partial_whitespace::whitespace,
+    partial_data::start as data, partial_whitespace::create_resolve_whitespace,
 };
 use crate::tokenizer::{Code, State, Tokenizer};
 
-const MARKERS: [Code; 5] = [
-    Code::VirtualSpace, // `whitespace`
-    Code::Char('\t'),   // `whitespace`
-    Code::Char(' '),    // `hard_break_trailing`, `whitespace`
-    Code::Char('&'),
-    Code::Char('\\'),
-];
+const MARKERS: [Code; 2] = [Code::Char('&'), Code::Char('\\')];
 
-/// Before string.
+/// Start of string.
 pub fn start(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.register_resolver(
+        "whitespace".to_string(),
+        Box::new(create_resolve_whitespace(false, false)),
+    );
+    before(tokenizer)
+}
+
+/// Before string.
+fn before(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         Code::None => State::Ok,
         _ => tokenizer.attempt_n(
-            vec![
-                Box::new(character_reference),
-                Box::new(character_escape),
-                Box::new(whitespace),
-            ],
-            |ok| {
-                let func = if ok { start } else { before_data };
-                Box::new(func)
-            },
+            vec![Box::new(character_reference), Box::new(character_escape)],
+            |ok| Box::new(if ok { before } else { before_data }),
         )(tokenizer),
     }
 }
 
 /// At data.
 fn before_data(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.go(|t| data(t, &MARKERS), start)(tokenizer)
+    tokenizer.go(|t| data(t, &MARKERS), before)(tokenizer)
 }
diff --git a/src/content/text.rs b/src/content/text.rs
index a9cf17c..4248053 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -12,40 +12,50 @@
 //! *   [Character reference][crate::construct::character_reference]
 //! *   [Code (text)][crate::construct::code_text]
 //! *   [Hard break (escape)][crate::construct::hard_break_escape]
-//! *   [Hard break (trailing)][crate::construct::hard_break_trailing]
 //! *   [HTML (text)][crate::construct::html_text]
 //! *   [Label start (image)][crate::construct::label_start_image]
 //! *   [Label start (link)][crate::construct::label_start_link]
 //! *   [Label end][crate::construct::label_end]
+//!
+//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
+//! > [whitespace][crate::construct::partial_whitespace].
 
 use crate::construct::{
     attention::start as attention, autolink::start as autolink,
     character_escape::start as character_escape, character_reference::start as character_reference,
     code_text::start as code_text, hard_break_escape::start as hard_break_escape,
-    hard_break_trailing::start as hard_break_trailing, html_text::start as html_text,
-    label_end::start as label_end, label_start_image::start as label_start_image,
-    label_start_link::start as label_start_link, partial_data::start as data,
-    partial_whitespace::whitespace,
+    html_text::start as html_text, label_end::start as label_end,
+    label_start_image::start as label_start_image, label_start_link::start as label_start_link,
+    partial_data::start as data, partial_whitespace::create_resolve_whitespace,
 };
 use crate::tokenizer::{Code, State, Tokenizer};
 
-const MARKERS: [Code; 12] = [
-    Code::VirtualSpace, // `whitespace`
-    Code::Char('\t'),   // `whitespace`
-    Code::Char(' '),    // `hard_break_trailing`, `whitespace`
-    Code::Char('!'),    // `label_start_image`
-    Code::Char('&'),    // `character_reference`
-    Code::Char('*'),    // `attention`
-    Code::Char('<'),    // `autolink`, `html_text`
-    Code::Char('['),    // `label_start_link`
-    Code::Char('\\'),   // `character_escape`, `hard_break_escape`
-    Code::Char(']'),    // `label_end`
-    Code::Char('_'),    // `attention`
-    Code::Char('`'),    // `code_text`
+const MARKERS: [Code; 9] = [
+    Code::Char('!'),  // `label_start_image`
+    Code::Char('&'),  // `character_reference`
+    Code::Char('*'),  // `attention`
+    Code::Char('<'),  // `autolink`, `html_text`
+    Code::Char('['),  // `label_start_link`
+    Code::Char('\\'), // `character_escape`, `hard_break_escape`
+    Code::Char(']'),  // `label_end`
+    Code::Char('_'),  // `attention`
+    Code::Char('`'),  // `code_text`
 ];
 
-/// Before text.
+/// Start of text.
 pub fn start(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.register_resolver(
+        "whitespace".to_string(),
+        Box::new(create_resolve_whitespace(
+            tokenizer.parse_state.constructs.hard_break_trailing,
+            true,
+        )),
+    );
+    before(tokenizer)
+}
+
+/// Before text.
+pub fn before(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         Code::None => State::Ok,
         _ => tokenizer.attempt_n(
@@ -56,17 +66,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
                 Box::new(character_reference),
                 Box::new(code_text),
                 Box::new(hard_break_escape),
-                Box::new(hard_break_trailing),
                 Box::new(html_text),
                 Box::new(label_end),
                 Box::new(label_start_image),
                 Box::new(label_start_link),
-                Box::new(whitespace),
             ],
-            |ok| {
-                let func = if ok { start } else { before_data };
-                Box::new(func)
-            },
+            |ok| Box::new(if ok { before } else { before_data }),
         )(tokenizer),
     }
 }
@@ -77,5 +82,5 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
 /// |qwe
 /// ```
 fn before_data(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.go(|t| data(t, &MARKERS), start)(tokenizer)
+    tokenizer.go(|t| data(t, &MARKERS), before)(tokenizer)
 }
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 8aa4df1..a78f5e2 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -95,12 +95,14 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
                         tokenizer.define_skip(&enter.point);
                     }
 
-                    let func = match state {
-                        State::Fn(func) => func,
-                        _ => unreachable!("cannot be ok/nok"),
-                    };
-
-                    state = tokenizer.push(enter.point.index, events[index + 1].point.index, func);
+                    state = tokenizer.push(
+                        enter.point.index,
+                        events[index + 1].point.index,
+                        match state {
+                            State::Fn(func) => func,
+                            _ => unreachable!("cannot be ok/nok"),
+                        },
+                    );
 
                     link_index = link_curr.next;
                 }
diff --git a/src/token.rs b/src/token.rs
index fd7999d..a0479e1 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -872,25 +872,6 @@ pub enum Token {
     /// *   **Context**:
     ///     [text content][crate::content::text]
     /// *   **Content model**:
-    ///     [`HardBreakEscapeMarker`][Token::HardBreakEscapeMarker]
-    /// *   **Construct**:
-    ///     [`hard_break_escape`][crate::construct::hard_break_escape]
-    ///
-    /// ## Example
-    ///
-    /// ```markdown
-    /// > | a\␊
-    ///      ^^
-    /// > | b
-    /// ```
-    HardBreakEscape,
-    /// Hard break (escape) marker.
-    ///
-    /// ## Info
-    ///
-    /// *   **Context**:
-    ///     [text content][crate::content::text]
-    /// *   **Content model**:
     ///     void
     /// *   **Construct**:
     ///     [`hard_break_escape`][crate::construct::hard_break_escape]
@@ -902,7 +883,7 @@ pub enum Token {
     ///      ^
     /// > | b
     /// ```
-    HardBreakEscapeMarker,
+    HardBreakEscape,
     /// Whole hard break (trailing).
     ///
     /// ## Info
@@ -910,28 +891,9 @@ pub enum Token {
     /// *   **Context**:
     ///     [text content][crate::content::text]
     /// *   **Content model**:
-    ///     [`HardBreakTrailingSpace`][Token::HardBreakTrailingSpace]
-    /// *   **Construct**:
-    ///     [`hard_break_trailing`][crate::construct::hard_break_trailing]
-    ///
-    /// ## Example
-    ///
-    /// ```markdown
-    /// > | a␠␠␊
-    ///      ^^^
-    /// > | b
-    /// ```
-    HardBreakTrailing,
-    /// Hard break (trailing) spaces.
-    ///
-    /// ## Info
-    ///
-    /// *   **Context**:
-    ///     [`HardBreakTrailing`][Token::HardBreakTrailing]
-    /// *   **Content model**:
     ///     void
     /// *   **Construct**:
-    ///     [`hard_break_trailing`][crate::construct::hard_break_trailing]
+    ///     [`whitespace`][crate::construct::partial_whitespace]
     ///
     /// ## Example
     ///
@@ -940,7 +902,7 @@ pub enum Token {
     ///      ^^
     /// > | b
     /// ```
-    HardBreakTrailingSpace,
+    HardBreakTrailing,
     /// Whole heading (atx).
     ///
     /// ## Info
@@ -1884,8 +1846,8 @@ pub const VOID_TOKENS: [Token; 39] = [
     Token::DefinitionMarker,
     Token::DefinitionTitleMarker,
     Token::EmphasisSequence,
-    Token::HardBreakEscapeMarker,
-    Token::HardBreakTrailingSpace,
+    Token::HardBreakEscape,
+    Token::HardBreakTrailing,
     Token::HeadingAtxSequence,
     Token::HeadingSetextUnderline,
     Token::HtmlFlowData,
diff --git a/tests/hard_break_trailing.rs b/tests/hard_break_trailing.rs
index 2013c46..2f75084 100644
--- a/tests/hard_break_trailing.rs
+++ b/tests/hard_break_trailing.rs
@@ -28,12 +28,6 @@ fn hard_break_trailing() {
     );
 
     assert_eq!(
-        micromark("*foo\\\nbar*"),
-        "<p><em>foo<br />\nbar</em></p>",
-        "should support escape hard breaks in emphasis"
-    );
-
-    assert_eq!(
         micromark("`code  \ntext`"),
         "<p><code>code   text</code></p>",
         "should not support trailing hard breaks in code"
author	Titus Wormer <tituswormer@gmail.com>	2022-07-26 16:37:13 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-26 16:37:13 +0200
commit	a6b317ac7fbc95b8584056b3cebffbf9d1bba2c6 (patch)
tree	fd7be2fe6d7355d3aafaf8b731f0e0b48624debc
parent	297784cb925b1196d89479fa24c898703ae598d6 (diff)
download	markdown-rs-a6b317ac7fbc95b8584056b3cebffbf9d1bba2c6.tar.gz markdown-rs-a6b317ac7fbc95b8584056b3cebffbf9d1bba2c6.tar.bz2 markdown-rs-a6b317ac7fbc95b8584056b3cebffbf9d1bba2c6.zip