From 4ce1ac9e41cafa9051377470e8a246063f7d9b1a Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 9 Aug 2022 10:45:15 +0200 Subject: Rewrite algorithm to not pass around boxed functions * Pass state names from an enum around instead of boxed functions * Refactor to simplify attempts a lot * Use a subtokenizer for the the `document` content type --- src/construct/character_reference.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src/construct/character_reference.rs') diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 7cc74ba..ba05fab 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -66,7 +66,7 @@ use crate::constant::{ CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, CHARACTER_REFERENCE_NAMED_SIZE_MAX, }; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; use crate::util::slice::Slice; /// Start of a character reference. @@ -86,7 +86,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::CharacterReferenceMarker); tokenizer.consume(); tokenizer.exit(Token::CharacterReferenceMarker); - State::Fn(Box::new(open)) + State::Fn(StateName::CharacterReferenceOpen) } _ => State::Nok, } @@ -103,12 +103,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | a b /// ^ /// ``` -fn open(tokenizer: &mut Tokenizer) -> State { +// StateName::CharacterReferenceOpen +pub fn open(tokenizer: &mut Tokenizer) -> State { if let Some(b'#') = tokenizer.current { tokenizer.enter(Token::CharacterReferenceMarkerNumeric); tokenizer.consume(); tokenizer.exit(Token::CharacterReferenceMarkerNumeric); - State::Fn(Box::new(numeric)) + State::Fn(StateName::CharacterReferenceNumeric) } else { tokenizer.tokenize_state.marker = b'&'; tokenizer.enter(Token::CharacterReferenceValue); @@ -125,14 +126,15 @@ fn open(tokenizer: &mut Tokenizer) -> State { /// > | a b /// ^ /// ``` -fn numeric(tokenizer: &mut Tokenizer) -> State { +// StateName::CharacterReferenceNumeric +pub fn numeric(tokenizer: &mut Tokenizer) -> State { if let Some(b'x' | b'X') = tokenizer.current { tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal); tokenizer.consume(); tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal); tokenizer.enter(Token::CharacterReferenceValue); tokenizer.tokenize_state.marker = b'x'; - State::Fn(Box::new(value)) + State::Fn(StateName::CharacterReferenceValue) } else { tokenizer.enter(Token::CharacterReferenceValue); tokenizer.tokenize_state.marker = b'#'; @@ -154,7 +156,7 @@ fn numeric(tokenizer: &mut Tokenizer) -> State { /// > | a b /// ^ /// ``` -fn value(tokenizer: &mut Tokenizer) -> State { +pub fn value(tokenizer: &mut Tokenizer) -> State { if matches!(tokenizer.current, Some(b';')) && tokenizer.tokenize_state.size > 0 { // Named. if tokenizer.tokenize_state.marker == b'&' { @@ -200,7 +202,7 @@ fn value(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.size < max && test(&byte) { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - return State::Fn(Box::new(value)); + return State::Fn(StateName::CharacterReferenceValue); } } -- cgit