From f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 28 Jul 2022 16:48:00 +0200 Subject: Refactor to work on `char`s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, a custom char implementation was used. This was easier to work with, as sometimes “virtual” characters are injected, or characters are ignored. This replaces that with working on actual `char`s. In the hope of in the future working on `u8`s, even. This simplifies the state machine somewhat, as only `\n` is fed, regardless of whether it was a CRLF, CR, or LF. It also feeds `' '` instead of virtual spaces. The BOM, if present, is now available as a `ByteOrderMark` event. --- src/construct/code_text.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src/construct/code_text.rs') diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index f5f92fc..150f63b 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -84,7 +84,7 @@ //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element use crate::token::Token; -use crate::tokenizer::{Code, State, Tokenizer}; +use crate::tokenizer::{State, Tokenizer}; /// Start of code (text). /// @@ -98,9 +98,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { let len = tokenizer.events.len(); match tokenizer.current { - Code::Char('`') + Some('`') if tokenizer.parse_state.constructs.code_text - && (tokenizer.previous != Code::Char('`') + && (tokenizer.previous != Some('`') || (len > 0 && tokenizer.events[len - 1].token_type == Token::CharacterEscape)) => { @@ -119,7 +119,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State { - if let Code::Char('`') = tokenizer.current { + if let Some('`') = tokenizer.current { tokenizer.consume(); State::Fn(Box::new(move |t| sequence_open(t, size + 1))) } else { @@ -136,14 +136,14 @@ fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State { /// ``` fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State { match tokenizer.current { - Code::None => State::Nok, - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + None => State::Nok, + Some('\n') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); State::Fn(Box::new(move |t| between(t, size_open))) } - Code::Char('`') => { + Some('`') => { tokenizer.enter(Token::CodeTextSequence); sequence_close(tokenizer, size_open, 0) } @@ -162,7 +162,7 @@ fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State { /// ``` fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State { match tokenizer.current { - Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r' | '`') => { + None | Some('\n' | '`') => { tokenizer.exit(Token::CodeTextData); between(tokenizer, size_open) } @@ -181,7 +181,7 @@ fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State { /// ``` fn sequence_close(tokenizer: &mut Tokenizer, size_open: usize, size: usize) -> State { match tokenizer.current { - Code::Char('`') => { + Some('`') => { tokenizer.consume(); State::Fn(Box::new(move |t| sequence_close(t, size_open, size + 1))) } -- cgit