From 4ce1ac9e41cafa9051377470e8a246063f7d9b1a Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 9 Aug 2022 10:45:15 +0200 Subject: Rewrite algorithm to not pass around boxed functions * Pass state names from an enum around instead of boxed functions * Refactor to simplify attempts a lot * Use a subtokenizer for the the `document` content type --- src/construct/html_flow.rs | 188 ++++++++++++++++++++++----------------------- 1 file changed, 94 insertions(+), 94 deletions(-) (limited to 'src/construct/html_flow.rs') diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index aaa803d..779146c 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -101,13 +101,11 @@ use crate::constant::{ HTML_BLOCK_NAMES, HTML_CDATA_PREFIX, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE, }; -use crate::construct::{ - blank_line::start as blank_line, - partial_non_lazy_continuation::start as partial_non_lazy_continuation, - partial_space_or_tab::{space_or_tab_with_options, Options as SpaceOrTabOptions}, +use crate::construct::partial_space_or_tab::{ + space_or_tab_with_options, Options as SpaceOrTabOptions, }; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; use crate::util::slice::Slice; /// Symbol for ` /// ^ /// ``` -fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'/') => { tokenizer.consume(); tokenizer.tokenize_state.start = tokenizer.point.index; - State::Fn(Box::new(continuation_raw_end_tag)) + State::Fn(StateName::HtmlFlowContinuationRawEndTag) } _ => continuation(tokenizer), } @@ -764,7 +764,7 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { /// > | /// ^^^^^^ /// ``` -fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { // Guaranteed to be valid ASCII bytes. @@ -779,7 +779,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { if HTML_RAW_NAMES.contains(&name.as_str()) { tokenizer.consume(); - State::Fn(Box::new(continuation_close)) + State::Fn(StateName::HtmlFlowContinuationClose) } else { continuation(tokenizer) } @@ -788,7 +788,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { if tokenizer.point.index - tokenizer.tokenize_state.start < HTML_RAW_SIZE_MAX => { tokenizer.consume(); - State::Fn(Box::new(continuation_raw_end_tag)) + State::Fn(StateName::HtmlFlowContinuationRawEndTag) } _ => { tokenizer.tokenize_state.start = 0; @@ -803,11 +803,11 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { /// > | &<]]> /// ^ /// ``` -fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b']') => { tokenizer.consume(); - State::Fn(Box::new(continuation_declaration_inside)) + State::Fn(StateName::HtmlFlowContinuationDeclarationInside) } _ => continuation(tokenizer), } @@ -827,15 +827,15 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State { /// > | &<]]> /// ^ /// ``` -fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.consume(); - State::Fn(Box::new(continuation_close)) + State::Fn(StateName::HtmlFlowContinuationClose) } Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => { tokenizer.consume(); - State::Fn(Box::new(continuation_declaration_inside)) + State::Fn(StateName::HtmlFlowContinuationDeclarationInside) } _ => continuation(tokenizer), } @@ -847,7 +847,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { /// > | /// ^ /// ``` -fn continuation_close(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::HtmlFlowData); @@ -855,7 +855,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Fn(Box::new(continuation_close)) + State::Fn(StateName::HtmlFlowContinuationClose) } } } @@ -866,7 +866,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State { /// > | /// ^ /// ``` -fn continuation_after(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_after(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::HtmlFlow); tokenizer.tokenize_state.marker = 0; // Feel free to interrupt. @@ -883,9 +883,9 @@ fn continuation_after(tokenizer: &mut Tokenizer) -> State { /// ^ /// | /// ``` -fn blank_line_before(tokenizer: &mut Tokenizer) -> State { +pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(blank_line)) + State::Fn(StateName::BlankLineStart) } -- cgit