From b00fafbdcba39e7e17144b07834702629b891062 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 14 Jun 2022 18:57:28 +0200 Subject: Fix support for deep subtokenization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix a couple of forgotten line ending handling in html (text) * Fix missing initial case for html (text) not having a `<` 😬 * Add line ending handling to `text` construct --- src/content/text.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'src/content/text.rs') diff --git a/src/content/text.rs b/src/content/text.rs index 73c2d55..433d030 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -52,12 +52,19 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// |qwe /// ``` fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - if let Code::None = code { - (State::Ok, None) - } else { - tokenizer.enter(TokenType::Data); - tokenizer.consume(code); - (State::Fn(Box::new(in_data)), None) + match code { + Code::None => (State::Ok, None), + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + (State::Fn(Box::new(start)), None) + } + _ => { + tokenizer.enter(TokenType::Data); + tokenizer.consume(code); + (State::Fn(Box::new(in_data)), None) + } } } @@ -73,7 +80,7 @@ fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { (State::Ok, None) } // To do: somehow get these markers from constructs. - Code::Char('&' | '\\' | '<') => { + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '&' | '\\' | '<') => { tokenizer.exit(TokenType::Data); start(tokenizer, code) } -- cgit