From a6b317ac7fbc95b8584056b3cebffbf9d1bba2c6 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 26 Jul 2022 16:37:13 +0200 Subject: Refactor to drastically improve perf around whitespace --- src/content/text.rs | 57 +++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 26 deletions(-) (limited to 'src/content/text.rs') diff --git a/src/content/text.rs b/src/content/text.rs index a9cf17c..4248053 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -12,40 +12,50 @@ //! * [Character reference][crate::construct::character_reference] //! * [Code (text)][crate::construct::code_text] //! * [Hard break (escape)][crate::construct::hard_break_escape] -//! * [Hard break (trailing)][crate::construct::hard_break_trailing] //! * [HTML (text)][crate::construct::html_text] //! * [Label start (image)][crate::construct::label_start_image] //! * [Label start (link)][crate::construct::label_start_link] //! * [Label end][crate::construct::label_end] +//! +//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by +//! > [whitespace][crate::construct::partial_whitespace]. use crate::construct::{ attention::start as attention, autolink::start as autolink, character_escape::start as character_escape, character_reference::start as character_reference, code_text::start as code_text, hard_break_escape::start as hard_break_escape, - hard_break_trailing::start as hard_break_trailing, html_text::start as html_text, - label_end::start as label_end, label_start_image::start as label_start_image, - label_start_link::start as label_start_link, partial_data::start as data, - partial_whitespace::whitespace, + html_text::start as html_text, label_end::start as label_end, + label_start_image::start as label_start_image, label_start_link::start as label_start_link, + partial_data::start as data, partial_whitespace::create_resolve_whitespace, }; use crate::tokenizer::{Code, State, Tokenizer}; -const MARKERS: [Code; 12] = [ - Code::VirtualSpace, // `whitespace` - Code::Char('\t'), // `whitespace` - Code::Char(' '), // `hard_break_trailing`, `whitespace` - Code::Char('!'), // `label_start_image` - Code::Char('&'), // `character_reference` - Code::Char('*'), // `attention` - Code::Char('<'), // `autolink`, `html_text` - Code::Char('['), // `label_start_link` - Code::Char('\\'), // `character_escape`, `hard_break_escape` - Code::Char(']'), // `label_end` - Code::Char('_'), // `attention` - Code::Char('`'), // `code_text` +const MARKERS: [Code; 9] = [ + Code::Char('!'), // `label_start_image` + Code::Char('&'), // `character_reference` + Code::Char('*'), // `attention` + Code::Char('<'), // `autolink`, `html_text` + Code::Char('['), // `label_start_link` + Code::Char('\\'), // `character_escape`, `hard_break_escape` + Code::Char(']'), // `label_end` + Code::Char('_'), // `attention` + Code::Char('`'), // `code_text` ]; -/// Before text. +/// Start of text. pub fn start(tokenizer: &mut Tokenizer) -> State { + tokenizer.register_resolver( + "whitespace".to_string(), + Box::new(create_resolve_whitespace( + tokenizer.parse_state.constructs.hard_break_trailing, + true, + )), + ); + before(tokenizer) +} + +/// Before text. +pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Code::None => State::Ok, _ => tokenizer.attempt_n( @@ -56,17 +66,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { Box::new(character_reference), Box::new(code_text), Box::new(hard_break_escape), - Box::new(hard_break_trailing), Box::new(html_text), Box::new(label_end), Box::new(label_start_image), Box::new(label_start_link), - Box::new(whitespace), ], - |ok| { - let func = if ok { start } else { before_data }; - Box::new(func) - }, + |ok| Box::new(if ok { before } else { before_data }), )(tokenizer), } } @@ -77,5 +82,5 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// |qwe /// ``` fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.go(|t| data(t, &MARKERS), start)(tokenizer) + tokenizer.go(|t| data(t, &MARKERS), before)(tokenizer) } -- cgit