From be62b2e29a61774100f676cfdd9b100cadf1905f Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 30 Jun 2022 16:35:13 +0200 Subject: Add support for trimming whitespace around string, text This commit introduces trimming initial and final whitespace around the whole string or text, or around line endings inside that string or text. * Add `register_resolver_before`, to run resolvers earlier than others, used for labels * Add resolver to merge `data` events, which are the most frequent token that occurs, and can happen adjacently. In `micromark-js` this sped up parsing a lot * Fix a bug where a virtual space was not seen as an okay event * Refactor to enable all turned off whitespace tests --- src/tokenizer.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/tokenizer.rs') diff --git a/src/tokenizer.rs b/src/tokenizer.rs index fe69366..817c1de 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1796,6 +1796,13 @@ impl<'a> Tokenizer<'a> { } } + pub fn register_resolver_before(&mut self, id: String, resolver: Box) { + if !self.resolver_ids.contains(&id) { + self.resolver_ids.push(id); + self.resolvers.insert(0, resolver); + } + } + /// Prepare for a next code to get consumed. fn expect(&mut self, code: Code) { assert!(self.consumed, "expected previous character to be consumed"); @@ -1901,7 +1908,7 @@ impl<'a> Tokenizer<'a> { let point = self.point.clone(); assert!( - current_token != previous.token_type || previous.point != point, + current_token != previous.token_type || previous.index != self.index, "expected non-empty token" ); -- cgit