From b00fafbdcba39e7e17144b07834702629b891062 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 14 Jun 2022 18:57:28 +0200 Subject: Fix support for deep subtokenization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix a couple of forgotten line ending handling in html (text) * Fix missing initial case for html (text) not having a `<` 😬 * Add line ending handling to `text` construct --- src/construct/html_text.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/construct') diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 95fb8c3..c118006 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -58,10 +58,14 @@ use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer /// a | b /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.enter(TokenType::HtmlText); - tokenizer.enter(TokenType::HtmlTextData); - tokenizer.consume(code); - (State::Fn(Box::new(open)), None) + if Code::Char('<') == code { + tokenizer.enter(TokenType::HtmlText); + tokenizer.enter(TokenType::HtmlTextData); + tokenizer.consume(code); + (State::Fn(Box::new(open)), None) + } else { + (State::Nok, None) + } } /// After `<`, before a tag name or other stuff. @@ -582,9 +586,9 @@ pub fn tag_open_attribute_value_quoted( pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, None), - Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => { - tag_open_between(tokenizer, code) - } + Code::CarriageReturnLineFeed + | Code::VirtualSpace + | Code::Char('\r' | '\n' | '\t' | ' ' | '/' | '>') => tag_open_between(tokenizer, code), Code::Char(_) => { tokenizer.consume(code); (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None) @@ -603,9 +607,9 @@ pub fn tag_open_attribute_value_quoted_after( code: Code, ) -> StateFnResult { match code { - Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '>' | '/') => { - tag_open_between(tokenizer, code) - } + Code::CarriageReturnLineFeed + | Code::VirtualSpace + | Code::Char('\r' | '\n' | '\t' | ' ' | '>' | '/') => tag_open_between(tokenizer, code), _ => (State::Nok, None), } } -- cgit