diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-09 10:45:15 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-09 10:45:15 +0200 |
commit | 4ce1ac9e41cafa9051377470e8a246063f7d9b1a (patch) | |
tree | d678d9583764b2706fe7ea4876e91e40609f15b0 /src/construct | |
parent | 8ffed1822bcbc1b6ce6647b840fb03996b0635ea (diff) | |
download | markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.tar.gz markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.tar.bz2 markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.zip |
Rewrite algorithm to not pass around boxed functions
* Pass state names from an enum around instead of boxed functions
* Refactor to simplify attempts a lot
* Use a subtokenizer for the the `document` content type
Diffstat (limited to '')
27 files changed, 795 insertions, 688 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index fc2acfb..5a98a89 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -52,7 +52,7 @@ //! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element use crate::token::Token; -use crate::tokenizer::{Event, EventType, Point, State, Tokenizer}; +use crate::tokenizer::{Event, EventType, Point, State, StateName, Tokenizer}; use crate::unicode::PUNCTUATION; use crate::util::slice::Slice; @@ -132,11 +132,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | ** /// ^^ /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.consume(); - State::Fn(Box::new(inside)) + State::Fn(StateName::AttentionInside) } _ => { tokenizer.exit(Token::AttentionSequence); diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 1444c61..15bfac1 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -103,7 +103,7 @@ use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX}; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of an autolink. /// @@ -121,7 +121,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(Token::AutolinkMarker); tokenizer.enter(Token::AutolinkProtocol); - State::Fn(Box::new(open)) + State::Fn(StateName::AutolinkOpen) } _ => State::Nok, } @@ -135,12 +135,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | a<user@example.com>b /// ^ /// ``` -fn open(tokenizer: &mut Tokenizer) -> State { +pub fn open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphabetic. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(scheme_or_email_atext)) + State::Fn(StateName::AutolinkSchemeOrEmailAtext) } _ => email_atext(tokenizer), } @@ -154,7 +154,7 @@ fn open(tokenizer: &mut Tokenizer) -> State { /// > | a<user@example.com>b /// ^ /// ``` -fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { +pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphanumeric and `+`, `-`, and `.`. Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { @@ -174,12 +174,12 @@ fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { /// > | a<user@example.com>b /// ^ /// ``` -fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { +pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b':') => { tokenizer.consume(); tokenizer.tokenize_state.size = 0; - State::Fn(Box::new(url_inside)) + State::Fn(StateName::AutolinkUrlInside) } // ASCII alphanumeric and `+`, `-`, and `.`. Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') @@ -187,7 +187,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Fn(Box::new(scheme_inside_or_email_atext)) + State::Fn(StateName::AutolinkSchemeInsideOrEmailAtext) } _ => { tokenizer.tokenize_state.size = 0; @@ -202,7 +202,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { /// > | a<https://example.com>b /// ^ /// ``` -fn url_inside(tokenizer: &mut Tokenizer) -> State { +pub fn url_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.exit(Token::AutolinkProtocol); @@ -212,7 +212,7 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State { None | Some(b'\0'..=0x1F | b' ' | b'<' | 0x7F) => State::Nok, Some(_) => { tokenizer.consume(); - State::Fn(Box::new(url_inside)) + State::Fn(StateName::AutolinkUrlInside) } } } @@ -223,11 +223,11 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State { /// > | a<user.name@example.com>b /// ^ /// ``` -fn email_atext(tokenizer: &mut Tokenizer) -> State { +pub fn email_atext(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'@') => { tokenizer.consume(); - State::Fn(Box::new(email_at_sign_or_dot)) + State::Fn(StateName::AutolinkEmailAtSignOrDot) } // ASCII atext. // @@ -250,7 +250,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State { b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~', ) => { tokenizer.consume(); - State::Fn(Box::new(email_atext)) + State::Fn(StateName::AutolinkEmailAtext) } _ => State::Nok, } @@ -262,7 +262,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State { /// > | a<user.name@example.com>b /// ^ ^ /// ``` -fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { +pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphanumeric. Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => email_value(tokenizer), @@ -276,12 +276,12 @@ fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { /// > | a<user.name@example.com>b /// ^ /// ``` -fn email_label(tokenizer: &mut Tokenizer) -> State { +pub fn email_label(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'.') => { tokenizer.tokenize_state.size = 0; tokenizer.consume(); - State::Fn(Box::new(email_at_sign_or_dot)) + State::Fn(StateName::AutolinkEmailAtSignOrDot) } Some(b'>') => { tokenizer.tokenize_state.size = 0; @@ -304,20 +304,20 @@ fn email_label(tokenizer: &mut Tokenizer) -> State { /// > | a<user.name@ex-ample.com>b /// ^ /// ``` -fn email_value(tokenizer: &mut Tokenizer) -> State { +pub fn email_value(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphanumeric or `-`. Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') if tokenizer.tokenize_state.size < AUTOLINK_DOMAIN_SIZE_MAX => { - let func = if matches!(tokenizer.current, Some(b'-')) { - email_value + let state_name = if matches!(tokenizer.current, Some(b'-')) { + StateName::AutolinkEmailValue } else { - email_label + StateName::AutolinkEmailLabel }; tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Fn(Box::new(func)) + State::Fn(state_name) } _ => { tokenizer.tokenize_state.size = 0; @@ -334,7 +334,7 @@ fn email_value(tokenizer: &mut Tokenizer) -> State { /// > | a<user@example.com>b /// ^ /// ``` -fn end(tokenizer: &mut Tokenizer) -> State { +pub fn end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.enter(Token::AutolinkMarker); diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index c4eacf5..b12c2c4 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -33,7 +33,7 @@ //! [flow]: crate::content::flow use crate::construct::partial_space_or_tab::space_or_tab; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of a blank line. /// @@ -46,7 +46,8 @@ use crate::tokenizer::{State, Tokenizer}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt_opt(space_or_tab(), after)(tokenizer) + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::BlankLineAfter) } /// After zero or more spaces or tabs, before a line ending or EOF. @@ -57,7 +58,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | ␊ /// ^ /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => State::Ok, _ => State::Nok, diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 7e4753d..df58d62 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -36,7 +36,7 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::space_or_tab_min_max; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of block quote. /// @@ -45,13 +45,17 @@ use crate::tokenizer::{State, Tokenizer}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - let max = if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }; if tokenizer.parse_state.constructs.block_quote { - tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer) + let state_name = space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + ); + tokenizer.go(state_name, StateName::BlockQuoteBefore) } else { State::Nok } @@ -63,7 +67,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | > a /// ^ /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.enter(Token::BlockQuote); @@ -80,13 +84,17 @@ fn before(tokenizer: &mut Tokenizer) -> State { /// > | > b /// ^ /// ``` -pub fn cont(tokenizer: &mut Tokenizer) -> State { - let max = if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }; - tokenizer.go(space_or_tab_min_max(0, max), cont_before)(tokenizer) +pub fn cont_start(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + ); + tokenizer.go(state_name, StateName::BlockQuoteContBefore) } /// After whitespace, before `>`. @@ -96,14 +104,14 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State { /// > | > b /// ^ /// ``` -fn cont_before(tokenizer: &mut Tokenizer) -> State { +pub fn cont_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.enter(Token::BlockQuotePrefix); tokenizer.enter(Token::BlockQuoteMarker); tokenizer.consume(); tokenizer.exit(Token::BlockQuoteMarker); - State::Fn(Box::new(cont_after)) + State::Fn(StateName::BlockQuoteContAfter) } _ => State::Nok, } @@ -117,15 +125,13 @@ fn cont_before(tokenizer: &mut Tokenizer) -> State { /// > | >b /// ^ /// ``` -fn cont_after(tokenizer: &mut Tokenizer) -> State { +pub fn cont_after(tokenizer: &mut Tokenizer) -> State { if let Some(b'\t' | b' ') = tokenizer.current { tokenizer.enter(Token::SpaceOrTab); tokenizer.consume(); tokenizer.exit(Token::SpaceOrTab); - tokenizer.exit(Token::BlockQuotePrefix); - State::Ok - } else { - tokenizer.exit(Token::BlockQuotePrefix); - State::Ok } + + tokenizer.exit(Token::BlockQuotePrefix); + State::Ok } diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 4419d7a..de09f17 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -34,7 +34,7 @@ //! [hard_break_escape]: crate::construct::hard_break_escape use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of a character escape. /// @@ -49,7 +49,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::CharacterEscapeMarker); tokenizer.consume(); tokenizer.exit(Token::CharacterEscapeMarker); - State::Fn(Box::new(inside)) + State::Fn(StateName::CharacterEscapeInside) } _ => State::Nok, } @@ -61,7 +61,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | a\*b /// ^ /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +// StateName::CharacterEscapeInside +pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII punctuation. Some(b'!'..=b'/' | b':'..=b'@' | b'['..=b'`' | b'{'..=b'~') => { diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 7cc74ba..ba05fab 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -66,7 +66,7 @@ use crate::constant::{ CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, CHARACTER_REFERENCE_NAMED_SIZE_MAX, }; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; use crate::util::slice::Slice; /// Start of a character reference. @@ -86,7 +86,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::CharacterReferenceMarker); tokenizer.consume(); tokenizer.exit(Token::CharacterReferenceMarker); - State::Fn(Box::new(open)) + State::Fn(StateName::CharacterReferenceOpen) } _ => State::Nok, } @@ -103,12 +103,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | a	b /// ^ /// ``` -fn open(tokenizer: &mut Tokenizer) -> State { +// StateName::CharacterReferenceOpen +pub fn open(tokenizer: &mut Tokenizer) -> State { if let Some(b'#') = tokenizer.current { tokenizer.enter(Token::CharacterReferenceMarkerNumeric); tokenizer.consume(); tokenizer.exit(Token::CharacterReferenceMarkerNumeric); - State::Fn(Box::new(numeric)) + State::Fn(StateName::CharacterReferenceNumeric) } else { tokenizer.tokenize_state.marker = b'&'; tokenizer.enter(Token::CharacterReferenceValue); @@ -125,14 +126,15 @@ fn open(tokenizer: &mut Tokenizer) -> State { /// > | a	b /// ^ /// ``` -fn numeric(tokenizer: &mut Tokenizer) -> State { +// StateName::CharacterReferenceNumeric +pub fn numeric(tokenizer: &mut Tokenizer) -> State { if let Some(b'x' | b'X') = tokenizer.current { tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal); tokenizer.consume(); tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal); tokenizer.enter(Token::CharacterReferenceValue); tokenizer.tokenize_state.marker = b'x'; - State::Fn(Box::new(value)) + State::Fn(StateName::CharacterReferenceValue) } else { tokenizer.enter(Token::CharacterReferenceValue); tokenizer.tokenize_state.marker = b'#'; @@ -154,7 +156,7 @@ fn numeric(tokenizer: &mut Tokenizer) -> State { /// > | a	b /// ^ /// ``` -fn value(tokenizer: &mut Tokenizer) -> State { +pub fn value(tokenizer: &mut Tokenizer) -> State { if matches!(tokenizer.current, Some(b';')) && tokenizer.tokenize_state.size > 0 { // Named. if tokenizer.tokenize_state.marker == b'&' { @@ -200,7 +202,7 @@ fn value(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.size < max && test(&byte) { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - return State::Fn(Box::new(value)); + return State::Fn(StateName::CharacterReferenceValue); } } diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index a22a0f9..46c5f9f 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -102,12 +102,9 @@ //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE}; -use crate::construct::{ - partial_non_lazy_continuation::start as partial_non_lazy_continuation, - partial_space_or_tab::{space_or_tab, space_or_tab_min_max}, -}; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::token::Token; -use crate::tokenizer::{ContentType, State, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer}; use crate::util::slice::{Position, Slice}; /// Start of fenced code. @@ -122,17 +119,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.code_fenced { tokenizer.enter(Token::CodeFenced); tokenizer.enter(Token::CodeFencedFence); - tokenizer.go( - space_or_tab_min_max( - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - ), - before_sequence_open, - )(tokenizer) + let state_name = space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + ); + tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceOpen) } else { State::Nok } @@ -146,7 +142,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// | console.log(1) /// | ~~~ /// ``` -fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { +pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { let tail = tokenizer.events.last(); let mut prefix = 0; @@ -178,16 +174,17 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { /// | console.log(1) /// | ~~~ /// ``` -fn sequence_open(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Fn(Box::new(sequence_open)) + State::Fn(StateName::CodeFencedSequenceOpen) } _ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => { tokenizer.exit(Token::CodeFencedFenceSequence); - tokenizer.attempt_opt(space_or_tab(), info_before)(tokenizer) + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::CodeFencedInfoBefore) } _ => { tokenizer.tokenize_state.marker = 0; @@ -206,7 +203,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State { /// | console.log(1) /// | ~~~ /// ``` -fn info_before(tokenizer: &mut Tokenizer) -> State { +pub fn info_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::CodeFencedFence); @@ -217,7 +214,7 @@ fn info_before(tokenizer: &mut Tokenizer) -> State { _ => { tokenizer.enter(Token::CodeFencedFenceInfo); tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - info_inside(tokenizer) + info(tokenizer) } } } @@ -230,7 +227,7 @@ fn info_before(tokenizer: &mut Tokenizer) -> State { /// | console.log(1) /// | ~~~ /// ``` -fn info_inside(tokenizer: &mut Tokenizer) -> State { +pub fn info(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::Data); @@ -243,7 +240,8 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State { Some(b'\t' | b' ') => { tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceInfo); - tokenizer.attempt_opt(space_or_tab(), meta_before)(tokenizer) + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::CodeFencedMetaBefore) } Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { tokenizer.concrete = false; @@ -254,7 +252,7 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State { } Some(_) => { tokenizer.consume(); - State::Fn(Box::new(info_inside)) + State::Fn(StateName::CodeFencedInfo) } } } @@ -267,7 +265,7 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State { /// | console.log(1) /// | ~~~ /// ``` -fn meta_before(tokenizer: &mut Tokenizer) -> State { +pub fn meta_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::CodeFencedFence); @@ -291,7 +289,7 @@ fn meta_before(tokenizer: &mut Tokenizer) -> State { /// | console.log(1) /// | ~~~ /// ``` -fn meta(tokenizer: &mut Tokenizer) -> State { +pub fn meta(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::Data); @@ -310,7 +308,7 @@ fn meta(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Fn(Box::new(meta)) + State::Fn(StateName::CodeFencedMeta) } } } @@ -324,10 +322,14 @@ fn meta(tokenizer: &mut Tokenizer) -> State { /// ^ /// | ~~~ /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { - tokenizer.check(partial_non_lazy_continuation, |ok| { - Box::new(if ok { at_non_lazy_break } else { after }) - })(tokenizer) +pub fn at_break(tokenizer: &mut Tokenizer) -> State { + tokenizer.check(StateName::NonLazyContinuationStart, |ok| { + State::Fn(if ok { + StateName::CodeFencedAtNonLazyBreak + } else { + StateName::CodeFencedAfter + }) + }) } /// At an eol/eof in code, before a non-lazy closing fence or content. @@ -339,10 +341,14 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { /// ^ /// | ~~~ /// ``` -fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(close_begin, |ok| { - Box::new(if ok { after } else { content_before }) - })(tokenizer) +pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt(StateName::CodeFencedCloseBefore, |ok| { + State::Fn(if ok { + StateName::CodeFencedAfter + } else { + StateName::CodeFencedContentBefore + }) + }) } /// Before a closing fence, at the line ending. @@ -353,13 +359,13 @@ fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { /// ^ /// | ~~~ /// ``` -fn close_begin(tokenizer: &mut Tokenizer) -> State { +pub fn close_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(close_start)) + State::Fn(StateName::CodeFencedCloseStart) } _ => unreachable!("expected eol"), } @@ -373,19 +379,18 @@ fn close_begin(tokenizer: &mut Tokenizer) -> State { /// > | ~~~ /// ^ /// ``` -fn close_start(tokenizer: &mut Tokenizer) -> State { +pub fn close_start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::CodeFencedFence); - tokenizer.go( - space_or_tab_min_max( - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - ), - close_before, - )(tokenizer) + let state_name = space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + ); + tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceClose) } /// In a closing fence, after optional whitespace, before sequence. @@ -396,11 +401,11 @@ fn close_start(tokenizer: &mut Tokenizer) -> State { /// > | ~~~ /// ^ /// ``` -fn close_before(tokenizer: &mut Tokenizer) -> State { +pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.enter(Token::CodeFencedFenceSequence); - close_sequence(tokenizer) + sequence_close(tokenizer) } _ => State::Nok, } @@ -414,19 +419,20 @@ fn close_before(tokenizer: &mut Tokenizer) -> State { /// > | ~~~ /// ^ /// ``` -fn close_sequence(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.size_other += 1; tokenizer.consume(); - State::Fn(Box::new(close_sequence)) + State::Fn(StateName::CodeFencedSequenceClose) } _ if tokenizer.tokenize_state.size_other >= CODE_FENCED_SEQUENCE_SIZE_MIN && tokenizer.tokenize_state.size_other >= tokenizer.tokenize_state.size => { tokenizer.tokenize_state.size_other = 0; tokenizer.exit(Token::CodeFencedFenceSequence); - tokenizer.attempt_opt(space_or_tab(), close_sequence_after)(tokenizer) + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::CodeFencedAfterSequenceClose) } _ => { tokenizer.tokenize_state.size_other = 0; @@ -443,7 +449,7 @@ fn close_sequence(tokenizer: &mut Tokenizer) -> State { /// > | ~~~ /// ^ /// ``` -fn close_sequence_after(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::CodeFencedFence); @@ -461,11 +467,11 @@ fn close_sequence_after(tokenizer: &mut Tokenizer) -> State { /// ^ /// | ~~~ /// ``` -fn content_before(tokenizer: &mut Tokenizer) -> State { +pub fn content_before(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(content_start)) + State::Fn(StateName::CodeFencedContentStart) } /// Before code content, definitely not before a closing fence. /// @@ -475,11 +481,9 @@ fn content_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// | ~~~ /// ``` -fn content_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.go( - space_or_tab_min_max(0, tokenizer.tokenize_state.prefix), - content_begin, - )(tokenizer) +pub fn content_start(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.prefix); + tokenizer.go(state_name, StateName::CodeFencedBeforeContentChunk) } /// Before code content, after a prefix. @@ -490,12 +494,12 @@ fn content_start(tokenizer: &mut Tokenizer) -> State { /// ^ /// | ~~~ /// ``` -fn content_begin(tokenizer: &mut Tokenizer) -> State { +pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => at_break(tokenizer), _ => { tokenizer.enter(Token::CodeFlowChunk); - content_continue(tokenizer) + content_chunk(tokenizer) } } } @@ -508,7 +512,7 @@ fn content_begin(tokenizer: &mut Tokenizer) -> State { /// ^^^^^^^^^^^^^^ /// | ~~~ /// ``` -fn content_continue(tokenizer: &mut Tokenizer) -> State { +pub fn content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::CodeFlowChunk); @@ -516,7 +520,7 @@ fn content_continue(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Fn(Box::new(content_continue)) + State::Fn(StateName::CodeFencedContentChunk) } } } @@ -529,7 +533,7 @@ fn content_continue(tokenizer: &mut Tokenizer) -> State { /// > | ~~~ /// ^ /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::CodeFenced); tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.prefix = 0; diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 81a3080..516b493 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -48,7 +48,7 @@ use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::TAB_SIZE; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of code (indented). /// @@ -64,7 +64,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { // Do not interrupt paragraphs. if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented { tokenizer.enter(Token::CodeIndented); - tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer) + let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); + tokenizer.go(state_name, StateName::CodeIndentedAtBreak) } else { State::Nok } @@ -76,15 +77,19 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | aaa /// ^ ^ /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => after(tokenizer), - Some(b'\n') => tokenizer.attempt(further_start, |ok| { - Box::new(if ok { at_break } else { after }) - })(tokenizer), + Some(b'\n') => tokenizer.attempt(StateName::CodeIndentedFurtherStart, |ok| { + State::Fn(if ok { + StateName::CodeIndentedAtBreak + } else { + StateName::CodeIndentedAfter + }) + }), _ => { tokenizer.enter(Token::CodeFlowChunk); - content(tokenizer) + inside(tokenizer) } } } @@ -95,7 +100,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { /// > | aaa /// ^^^^ /// ``` -fn content(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::CodeFlowChunk); @@ -103,7 +108,7 @@ fn content(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Fn(Box::new(content)) + State::Fn(StateName::CodeIndentedInside) } } } @@ -114,7 +119,7 @@ fn content(tokenizer: &mut Tokenizer) -> State { /// > | aaa /// ^ /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::CodeIndented); // Feel free to interrupt. tokenizer.interrupt = false; @@ -128,17 +133,24 @@ fn after(tokenizer: &mut Tokenizer) -> State { /// ^ /// | bbb /// ``` -fn further_start(tokenizer: &mut Tokenizer) -> State { +pub fn further_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') if !tokenizer.lazy => { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(further_start)) + State::Fn(StateName::CodeIndentedFurtherStart) + } + _ if !tokenizer.lazy => { + let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); + tokenizer.attempt(state_name, |ok| { + State::Fn(if ok { + StateName::CodeIndentedFurtherEnd + } else { + StateName::CodeIndentedFurtherBegin + }) + }) } - _ if !tokenizer.lazy => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { - Box::new(if ok { further_end } else { further_begin }) - })(tokenizer), _ => State::Nok, } } @@ -150,7 +162,7 @@ fn further_start(tokenizer: &mut Tokenizer) -> State { /// ^ /// | bbb /// ``` -fn further_end(_tokenizer: &mut Tokenizer) -> State { +pub fn further_end(_tokenizer: &mut Tokenizer) -> State { State::Ok } @@ -161,8 +173,9 @@ fn further_end(_tokenizer: &mut Tokenizer) -> State { /// > | bbb /// ^ /// ``` -fn further_begin(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt_opt(space_or_tab(), further_after)(tokenizer) +pub fn further_begin(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::CodeIndentedFurtherAfter) } /// After whitespace, not indented enough. @@ -172,7 +185,7 @@ fn further_begin(tokenizer: &mut Tokenizer) -> State { /// > | bbb /// ^ /// ``` -fn further_after(tokenizer: &mut Tokenizer) -> State { +pub fn further_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => further_start(tokenizer), _ => State::Nok, diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index 31777f4..5bdefbb 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -84,7 +84,7 @@ //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of code (text). /// @@ -117,11 +117,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | `a` /// ^ /// ``` -fn sequence_open(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { if let Some(b'`') = tokenizer.current { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Fn(Box::new(sequence_open)) + State::Fn(StateName::CodeTextSequenceOpen) } else { tokenizer.exit(Token::CodeTextSequence); between(tokenizer) @@ -134,7 +134,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State { /// > | `a` /// ^^ /// ``` -fn between(tokenizer: &mut Tokenizer) -> State { +pub fn between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { tokenizer.tokenize_state.size = 0; @@ -144,7 +144,7 @@ fn between(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(between)) + State::Fn(StateName::CodeTextBetween) } Some(b'`') => { tokenizer.enter(Token::CodeTextSequence); @@ -163,7 +163,7 @@ fn between(tokenizer: &mut Tokenizer) -> State { /// > | `a` /// ^ /// ``` -fn data(tokenizer: &mut Tokenizer) -> State { +pub fn data(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n' | b'`') => { tokenizer.exit(Token::CodeTextData); @@ -171,7 +171,7 @@ fn data(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Fn(Box::new(data)) + State::Fn(StateName::CodeTextData) } } } @@ -182,12 +182,12 @@ fn data(tokenizer: &mut Tokenizer) -> State { /// > | `a` /// ^ /// ``` -fn sequence_close(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`') => { tokenizer.tokenize_state.size_other += 1; tokenizer.consume(); - State::Fn(Box::new(sequence_close)) + State::Fn(StateName::CodeTextSequenceClose) } _ => { if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_other { diff --git a/src/construct/definition.rs b/src/construct/definition.rs index a56dab4..fbad99d 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -93,14 +93,9 @@ //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element -use crate::construct::{ - partial_destination::start as destination, - partial_label::start as label, - partial_space_or_tab::{space_or_tab, space_or_tab_eol}, - partial_title::start as title, -}; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_eol}; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; use crate::util::skip::opt_back as skip_opt_back; /// At the start of a definition. @@ -124,7 +119,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { if possible && tokenizer.parse_state.constructs.definition { tokenizer.enter(Token::Definition); // Note: arbitrary whitespace allowed even if code (indented) is on. - tokenizer.attempt_opt(space_or_tab(), before)(tokenizer) + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::DefinitionBefore) } else { State::Nok } @@ -136,13 +132,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | [a]: b "c" /// ^ /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { tokenizer.tokenize_state.token_1 = Token::DefinitionLabel; tokenizer.tokenize_state.token_2 = Token::DefinitionLabelMarker; tokenizer.tokenize_state.token_3 = Token::DefinitionLabelString; - tokenizer.go(label, label_after)(tokenizer) + tokenizer.go(StateName::LabelStart, StateName::DefinitionLabelAfter) } _ => State::Nok, } @@ -154,7 +150,7 @@ fn before(tokenizer: &mut Tokenizer) -> State { /// > | [a]: b "c" /// ^ /// ``` -fn label_after(tokenizer: &mut Tokenizer) -> State { +pub fn label_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; @@ -164,34 +160,38 @@ fn label_after(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::DefinitionMarker); tokenizer.consume(); tokenizer.exit(Token::DefinitionMarker); - State::Fn(Box::new( - tokenizer.attempt_opt(space_or_tab_eol(), destination_before), - )) + State::Fn(StateName::DefinitionMarkerAfter) } _ => State::Nok, } } +/// To do. +pub fn marker_after(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab_eol(tokenizer); + tokenizer.attempt_opt(state_name, StateName::DefinitionDestinationBefore) +} + /// Before a destination. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` -fn destination_before(tokenizer: &mut Tokenizer) -> State { +pub fn destination_before(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::DefinitionDestination; tokenizer.tokenize_state.token_2 = Token::DefinitionDestinationLiteral; tokenizer.tokenize_state.token_3 = Token::DefinitionDestinationLiteralMarker; tokenizer.tokenize_state.token_4 = Token::DefinitionDestinationRaw; tokenizer.tokenize_state.token_5 = Token::DefinitionDestinationString; tokenizer.tokenize_state.size_other = usize::MAX; - tokenizer.attempt(destination, |ok| { - Box::new(if ok { - destination_after + tokenizer.attempt(StateName::DestinationStart, |ok| { + State::Fn(if ok { + StateName::DefinitionDestinationAfter } else { - destination_missing + StateName::DefinitionDestinationMissing }) - })(tokenizer) + }) } /// After a destination. @@ -200,18 +200,18 @@ fn destination_before(tokenizer: &mut Tokenizer) -> State { /// > | [a]: b "c" /// ^ /// ``` -fn destination_after(tokenizer: &mut Tokenizer) -> State { +pub fn destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; tokenizer.tokenize_state.token_4 = Token::Data; tokenizer.tokenize_state.token_5 = Token::Data; tokenizer.tokenize_state.size_other = 0; - tokenizer.attempt_opt(title_before, after)(tokenizer) + tokenizer.attempt_opt(StateName::DefinitionTitleBefore, StateName::DefinitionAfter) } /// Without destination. -fn destination_missing(tokenizer: &mut Tokenizer) -> State { +pub fn destination_missing(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; @@ -229,8 +229,9 @@ fn destination_missing(tokenizer: &mut Tokenizer) -> State { /// > | [a]: b "c" /// ^ /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt_opt(space_or_tab(), after_whitespace)(tokenizer) +pub fn after(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::DefinitionAfterWhitespace) } /// After a definition, after optional whitespace. @@ -241,7 +242,7 @@ fn after(tokenizer: &mut Tokenizer) -> State { /// > | [a]: b "c" /// ^ /// ``` -fn after_whitespace(tokenizer: &mut Tokenizer) -> State { +pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::Definition); @@ -261,8 +262,9 @@ fn after_whitespace(tokenizer: &mut Tokenizer) -> State { /// > | [a]: b "c" /// ^ /// ``` -fn title_before(tokenizer: &mut Tokenizer) -> State { - tokenizer.go(space_or_tab_eol(), title_before_marker)(tokenizer) +pub fn title_before(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab_eol(tokenizer); + tokenizer.go(state_name, StateName::DefinitionTitleBeforeMarker) } /// Before a title, after a line ending. @@ -272,11 +274,11 @@ fn title_before(tokenizer: &mut Tokenizer) -> State { /// > | "c" /// ^ /// ``` -fn title_before_marker(tokenizer: &mut Tokenizer) -> State { +pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::DefinitionTitle; tokenizer.tokenize_state.token_2 = Token::DefinitionTitleMarker; tokenizer.tokenize_state.token_3 = Token::DefinitionTitleString; - tokenizer.go(title, title_after)(tokenizer) + tokenizer.go(StateName::TitleStart, StateName::DefinitionTitleAfter) } /// After a title. @@ -285,11 +287,15 @@ fn title_before_marker(tokenizer: &mut Tokenizer) -> State { /// > | [a]: b "c" /// ^ /// ``` -fn title_after(tokenizer: &mut Tokenizer) -> State { +pub fn title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; - tokenizer.attempt_opt(space_or_tab(), title_after_after_optional_whitespace)(tokenizer) + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt( + state_name, + StateName::DefinitionTitleAfterOptionalWhitespace, + ) } /// After a title, after optional whitespace. @@ -298,7 +304,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State { /// > | [a]: b "c" /// ^ /// ``` -fn title_after_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State { +pub fn title_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => State::Ok, _ => State::Nok, diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index d09bf54..47b7e94 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -40,7 +40,7 @@ //! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of a hard break (escape). /// @@ -54,7 +54,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => { tokenizer.enter(Token::HardBreakEscape); tokenizer.consume(); - State::Fn(Box::new(after)) + State::Fn(StateName::HardBreakEscapeAfter) } _ => State::Nok, } @@ -67,7 +67,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ^ /// | b /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { tokenizer.exit(Token::HardBreakEscape); diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 6751567..45c4758 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -57,7 +57,7 @@ use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE}; use crate::token::Token; -use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer}; +use crate::tokenizer::{ContentType, Event, EventType, State, StateName, Tokenizer}; /// Start of a heading (atx). /// @@ -68,17 +68,16 @@ use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer}; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.heading_atx { tokenizer.enter(Token::HeadingAtx); - tokenizer.go( - space_or_tab_min_max( - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - ), - before, - )(tokenizer) + let state_name = space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + ); + tokenizer.go(state_name, StateName::HeadingAtxBefore) } else { State::Nok } @@ -90,7 +89,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | ## aa /// ^ /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State { if Some(b'#') == tokenizer.current { tokenizer.enter(Token::HeadingAtxSequence); sequence_open(tokenizer) @@ -105,7 +104,7 @@ fn before(tokenizer: &mut Tokenizer) -> State { /// > | ## aa /// ^ /// ``` -fn sequence_open(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => { tokenizer.tokenize_state.size = 0; @@ -115,12 +114,13 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State { Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Fn(Box::new(sequence_open)) + State::Fn(StateName::HeadingAtxSequenceOpen) } _ if tokenizer.tokenize_state.size > 0 => { tokenizer.tokenize_state.size = 0; tokenizer.exit(Token::HeadingAtxSequence); - tokenizer.go(space_or_tab(), at_break)(tokenizer) + let state_name = space_or_tab(tokenizer); + tokenizer.go(state_name, StateName::HeadingAtxAtBreak) } _ => { tokenizer.tokenize_state.size = 0; @@ -135,7 +135,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State { /// > | ## aa /// ^ /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::HeadingAtx); @@ -144,10 +144,13 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { tokenizer.interrupt = false; State::Ok } - Some(b'\t' | b' ') => tokenizer.go(space_or_tab(), at_break)(tokenizer), + Some(b'\t' | b' ') => { + let state_name = space_or_tab(tokenizer); + tokenizer.go(state_name, StateName::HeadingAtxAtBreak) + } Some(b'#') => { tokenizer.enter(Token::HeadingAtxSequence); - further_sequence(tokenizer) + sequence_further(tokenizer) } Some(_) => { tokenizer.enter_with_content(Token::Data, Some(ContentType::Text)); @@ -164,10 +167,10 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { /// > | ## aa ## /// ^ /// ``` -fn further_sequence(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_further(tokenizer: &mut Tokenizer) -> State { if let Some(b'#') = tokenizer.current { tokenizer.consume(); - State::Fn(Box::new(further_sequence)) + State::Fn(StateName::HeadingAtxSequenceFurther) } else { tokenizer.exit(Token::HeadingAtxSequence); at_break(tokenizer) @@ -180,7 +183,7 @@ fn further_sequence(tokenizer: &mut Tokenizer) -> State { /// > | ## aa /// ^ /// ``` -fn data(tokenizer: &mut Tokenizer) -> State { +pub fn data(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text. None | Some(b'\t' | b'\n' | b' ') => { @@ -189,7 +192,7 @@ fn data(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Fn(Box::new(data)) + State::Fn(StateName::HeadingAtxData) } } } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 675b2ac..50feba4 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -60,7 +60,7 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::token::Token; -use crate::tokenizer::{EventType, State, Tokenizer}; +use crate::tokenizer::{EventType, State, StateName, Tokenizer}; use crate::util::skip::opt_back as skip_opt_back; /// At a line ending, presumably an underline. @@ -83,17 +83,17 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { .token_type == Token::Paragraph) { - tokenizer.go( - space_or_tab_min_max( - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - ), - before, - )(tokenizer) + let state_name = space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + ); + + tokenizer.go(state_name, StateName::HeadingSetextBefore) } else { State::Nok } @@ -106,7 +106,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | == /// ^ /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-' | b'=') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); @@ -124,16 +124,17 @@ fn before(tokenizer: &mut Tokenizer) -> State { /// > | == /// ^ /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-' | b'=') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.consume(); - State::Fn(Box::new(inside)) + State::Fn(StateName::HeadingSetextInside) } _ => { tokenizer.tokenize_state.marker = 0; tokenizer.exit(Token::HeadingSetextUnderline); - tokenizer.attempt_opt(space_or_tab(), after)(tokenizer) + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::HeadingSetextAfter) } } } @@ -145,7 +146,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State { /// > | == /// ^ /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { // Feel free to interrupt. diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index aaa803d..779146c 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -101,13 +101,11 @@ use crate::constant::{ HTML_BLOCK_NAMES, HTML_CDATA_PREFIX, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE, }; -use crate::construct::{ - blank_line::start as blank_line, - partial_non_lazy_continuation::start as partial_non_lazy_continuation, - partial_space_or_tab::{space_or_tab_with_options, Options as SpaceOrTabOptions}, +use crate::construct::partial_space_or_tab::{ + space_or_tab_with_options, Options as SpaceOrTabOptions, }; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; use crate::util::slice::Slice; /// Symbol for `<script>` (condition 1). @@ -134,8 +132,9 @@ const COMPLETE: u8 = 7; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.html_flow { tokenizer.enter(Token::HtmlFlow); - tokenizer.go( - space_or_tab_with_options(SpaceOrTabOptions { + let state_name = space_or_tab_with_options( + tokenizer, + SpaceOrTabOptions { kind: Token::HtmlFlowData, min: 0, max: if tokenizer.parse_state.constructs.code_indented { @@ -145,9 +144,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { }, connect: false, content_type: None, - }), - before, - )(tokenizer) + }, + ); + + tokenizer.go(state_name, StateName::HtmlFlowBefore) } else { State::Nok } @@ -159,11 +159,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | <x /> /// ^ /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State { if Some(b'<') == tokenizer.current { tokenizer.enter(Token::HtmlFlowData); tokenizer.consume(); - State::Fn(Box::new(open)) + State::Fn(StateName::HtmlFlowOpen) } else { State::Nok } @@ -179,17 +179,17 @@ fn before(tokenizer: &mut Tokenizer) -> State { /// > | <!--xxx--> /// ^ /// ``` -fn open(tokenizer: &mut Tokenizer) -> State { +pub fn open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'!') => { tokenizer.consume(); - State::Fn(Box::new(declaration_open)) + State::Fn(StateName::HtmlFlowDeclarationOpen) } Some(b'/') => { tokenizer.consume(); tokenizer.tokenize_state.seen = true; tokenizer.tokenize_state.start = tokenizer.point.index; - State::Fn(Box::new(tag_close_start)) + State::Fn(StateName::HtmlFlowTagCloseStart) } Some(b'?') => { tokenizer.tokenize_state.marker = INSTRUCTION; @@ -198,7 +198,7 @@ fn open(tokenizer: &mut Tokenizer) -> State { tokenizer.concrete = true; // While we’re in an instruction instead of a declaration, we’re on a `?` // right now, so we do need to search for `>`, similar to declarations. - State::Fn(Box::new(continuation_declaration_inside)) + State::Fn(StateName::HtmlFlowContinuationDeclarationInside) } // ASCII alphabetical. Some(b'A'..=b'Z' | b'a'..=b'z') => { @@ -219,24 +219,24 @@ fn open(tokenizer: &mut Tokenizer) -> State { /// > | <![CDATA[>&<]]> /// ^ /// ``` -fn declaration_open(tokenizer: &mut Tokenizer) -> State { +pub fn declaration_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-') => { tokenizer.consume(); tokenizer.tokenize_state.marker = COMMENT; - State::Fn(Box::new(comment_open_inside)) + State::Fn(StateName::HtmlFlowCommentOpenInside) } Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); tokenizer.tokenize_state.marker = DECLARATION; // Do not form containers. tokenizer.concrete = true; - State::Fn(Box::new(continuation_declaration_inside)) + State::Fn(StateName::HtmlFlowContinuationDeclarationInside) } Some(b'[') => { tokenizer.consume(); tokenizer.tokenize_state.marker = CDATA; - State::Fn(Box::new(cdata_open_inside)) + State::Fn(StateName::HtmlFlowCdataOpenInside) } _ => State::Nok, } @@ -248,12 +248,12 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State { /// > | <!--xxx--> /// ^ /// ``` -fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { +pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { if let Some(b'-') = tokenizer.current { tokenizer.consume(); // Do not form containers. tokenizer.concrete = true; - State::Fn(Box::new(continuation_declaration_inside)) + State::Fn(StateName::HtmlFlowContinuationDeclarationInside) } else { tokenizer.tokenize_state.marker = 0; State::Nok @@ -266,7 +266,7 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { /// > | <![CDATA[>&<]]> /// ^^^^^^ /// ``` -fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { +pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) { tokenizer.tokenize_state.size += 1; tokenizer.consume(); @@ -275,9 +275,9 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 0; // Do not form containers. tokenizer.concrete = true; - State::Fn(Box::new(continuation)) + State::Fn(StateName::HtmlFlowContinuation) } else { - State::Fn(Box::new(cdata_open_inside)) + State::Fn(StateName::HtmlFlowCdataOpenInside) } } else { tokenizer.tokenize_state.marker = 0; @@ -292,10 +292,10 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { /// > | </x> /// ^ /// ``` -fn tag_close_start(tokenizer: &mut Tokenizer) -> State { +pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State { if let Some(b'A'..=b'Z' | b'a'..=b'z') = tokenizer.current { tokenizer.consume(); - State::Fn(Box::new(tag_name)) + State::Fn(StateName::HtmlFlowTagName) } else { tokenizer.tokenize_state.seen = false; tokenizer.tokenize_state.start = 0; @@ -311,7 +311,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State { /// > | </ab> /// ^^ /// ``` -fn tag_name(tokenizer: &mut Tokenizer) -> State { +pub fn tag_name(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => { let closing_tag = tokenizer.tokenize_state.seen; @@ -340,7 +340,7 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State { if slash { tokenizer.consume(); - State::Fn(Box::new(basic_self_closing)) + State::Fn(StateName::HtmlFlowBasicSelfClosing) } else { // Do not form containers. tokenizer.concrete = true; @@ -363,7 +363,7 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State { // ASCII alphanumerical and `-`. Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(tag_name)) + State::Fn(StateName::HtmlFlowTagName) } Some(_) => { tokenizer.tokenize_state.seen = false; @@ -378,12 +378,12 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State { /// > | <div/> /// ^ /// ``` -fn basic_self_closing(tokenizer: &mut Tokenizer) -> State { +pub fn basic_self_closing(tokenizer: &mut Tokenizer) -> State { if let Some(b'>') = tokenizer.current { tokenizer.consume(); // Do not form containers. tokenizer.concrete = true; - State::Fn(Box::new(continuation)) + State::Fn(StateName::HtmlFlowContinuation) } else { tokenizer.tokenize_state.marker = 0; State::Nok @@ -396,11 +396,11 @@ fn basic_self_closing(tokenizer: &mut Tokenizer) -> State { /// > | <x/> /// ^ /// ``` -fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State { +pub fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') => { tokenizer.consume(); - State::Fn(Box::new(complete_closing_tag_after)) + State::Fn(StateName::HtmlFlowCompleteClosingTagAfter) } _ => complete_end(tokenizer), } @@ -425,20 +425,20 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State { /// > | <a > /// ^ /// ``` -fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') => { tokenizer.consume(); - State::Fn(Box::new(complete_attribute_name_before)) + State::Fn(StateName::HtmlFlowCompleteAttributeNameBefore) } Some(b'/') => { tokenizer.consume(); - State::Fn(Box::new(complete_end)) + State::Fn(StateName::HtmlFlowCompleteEnd) } // ASCII alphanumerical and `:` and `_`. Some(b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(complete_attribute_name)) + State::Fn(StateName::HtmlFlowCompleteAttributeName) } _ => complete_end(tokenizer), } @@ -454,12 +454,12 @@ fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State { /// > | <a b> /// ^ /// ``` -fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphanumerical and `-`, `.`, `:`, and `_`. Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(complete_attribute_name)) + State::Fn(StateName::HtmlFlowCompleteAttributeName) } _ => complete_attribute_name_after(tokenizer), } @@ -474,15 +474,15 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State { /// > | <a b=c> /// ^ /// ``` -fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') => { tokenizer.consume(); - State::Fn(Box::new(complete_attribute_name_after)) + State::Fn(StateName::HtmlFlowCompleteAttributeNameAfter) } Some(b'=') => { tokenizer.consume(); - State::Fn(Box::new(complete_attribute_value_before)) + State::Fn(StateName::HtmlFlowCompleteAttributeValueBefore) } _ => complete_attribute_name_before(tokenizer), } @@ -497,7 +497,7 @@ fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State { /// > | <a b="c"> /// ^ /// ``` -fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'<' | b'=' | b'>' | b'`') => { tokenizer.tokenize_state.marker = 0; @@ -505,12 +505,12 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { } Some(b'\t' | b' ') => { tokenizer.consume(); - State::Fn(Box::new(complete_attribute_value_before)) + State::Fn(StateName::HtmlFlowCompleteAttributeValueBefore) } Some(b'"' | b'\'') => { tokenizer.tokenize_state.marker_other = tokenizer.current.unwrap(); tokenizer.consume(); - State::Fn(Box::new(complete_attribute_value_quoted)) + State::Fn(StateName::HtmlFlowCompleteAttributeValueQuoted) } _ => complete_attribute_value_unquoted(tokenizer), } @@ -524,7 +524,7 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { /// > | <a b='c'> /// ^ /// ``` -fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.tokenize_state.marker = 0; @@ -536,11 +536,11 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { { tokenizer.tokenize_state.marker_other = 0; tokenizer.consume(); - State::Fn(Box::new(complete_attribute_value_quoted_after)) + State::Fn(StateName::HtmlFlowCompleteAttributeValueQuotedAfter) } _ => { tokenizer.consume(); - State::Fn(Box::new(complete_attribute_value_quoted)) + State::Fn(StateName::HtmlFlowCompleteAttributeValueQuoted) } } } @@ -551,14 +551,14 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { /// > | <a b=c> /// ^ /// ``` -fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => { complete_attribute_name_after(tokenizer) } Some(_) => { tokenizer.consume(); - State::Fn(Box::new(complete_attribute_value_unquoted)) + State::Fn(StateName::HtmlFlowCompleteAttributeValueUnquoted) } } } @@ -570,7 +570,7 @@ fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { /// > | <a b="c"> /// ^ /// ``` -fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { if let Some(b'\t' | b' ' | b'/' | b'>') = tokenizer.current { complete_attribute_name_before(tokenizer) } else { @@ -585,10 +585,10 @@ fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { /// > | <a b="c"> /// ^ /// ``` -fn complete_end(tokenizer: &mut Tokenizer) -> State { +pub fn complete_end(tokenizer: &mut Tokenizer) -> State { if let Some(b'>') = tokenizer.current { tokenizer.consume(); - State::Fn(Box::new(complete_after)) + State::Fn(StateName::HtmlFlowCompleteAfter) } else { tokenizer.tokenize_state.marker = 0; State::Nok @@ -601,7 +601,7 @@ fn complete_end(tokenizer: &mut Tokenizer) -> State { /// > | <x> /// ^ /// ``` -fn complete_after(tokenizer: &mut Tokenizer) -> State { +pub fn complete_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { // Do not form containers. @@ -610,7 +610,7 @@ fn complete_after(tokenizer: &mut Tokenizer) -> State { } Some(b'\t' | b' ') => { tokenizer.consume(); - State::Fn(Box::new(complete_after)) + State::Fn(StateName::HtmlFlowCompleteAfter) } Some(_) => { tokenizer.tokenize_state.marker = 0; @@ -625,20 +625,20 @@ fn complete_after(tokenizer: &mut Tokenizer) -> State { /// > | <!--xxx--> /// ^ /// ``` -fn continuation(tokenizer: &mut Tokenizer) -> State { +pub fn continuation(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') if tokenizer.tokenize_state.marker == BASIC || tokenizer.tokenize_state.marker == COMPLETE => { tokenizer.exit(Token::HtmlFlowData); - tokenizer.check(blank_line_before, |ok| { - Box::new(if ok { - continuation_after + tokenizer.check(StateName::HtmlFlowBlankLineBefore, |ok| { + State::Fn(if ok { + StateName::HtmlFlowContinuationAfter } else { - continuation_start + StateName::HtmlFlowContinuationStart }) - })(tokenizer) + }) } // Note: important that this is after the basic/complete case. None | Some(b'\n') => { @@ -647,27 +647,27 @@ fn continuation(tokenizer: &mut Tokenizer) -> State { } Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => { tokenizer.consume(); - State::Fn(Box::new(continuation_comment_inside)) + State::Fn(StateName::HtmlFlowContinuationCommentInside) } Some(b'<') if tokenizer.tokenize_state.marker == RAW => { tokenizer.consume(); - State::Fn(Box::new(continuation_raw_tag_open)) + State::Fn(StateName::HtmlFlowContinuationRawTagOpen) } Some(b'>') if tokenizer.tokenize_state.marker == DECLARATION => { tokenizer.consume(); - State::Fn(Box::new(continuation_close)) + State::Fn(StateName::HtmlFlowContinuationClose) } Some(b'?') if tokenizer.tokenize_state.marker == INSTRUCTION => { tokenizer.consume(); - State::Fn(Box::new(continuation_declaration_inside)) + State::Fn(StateName::HtmlFlowContinuationDeclarationInside) } Some(b']') if tokenizer.tokenize_state.marker == CDATA => { tokenizer.consume(); - State::Fn(Box::new(continuation_character_data_inside)) + State::Fn(StateName::HtmlFlowContinuationCdataInside) } _ => { tokenizer.consume(); - State::Fn(Box::new(continuation)) + State::Fn(StateName::HtmlFlowContinuation) } } } @@ -679,14 +679,14 @@ fn continuation(tokenizer: &mut Tokenizer) -> State { /// ^ /// | asd /// ``` -fn continuation_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.check(partial_non_lazy_continuation, |ok| { - Box::new(if ok { - continuation_start_non_lazy +pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { + tokenizer.check(StateName::NonLazyContinuationStart, |ok| { + State::Fn(if ok { + StateName::HtmlFlowContinuationStartNonLazy } else { - continuation_after + StateName::HtmlFlowContinuationAfter }) - })(tokenizer) + }) } /// In continuation, at an eol, before non-lazy content. @@ -696,13 +696,13 @@ fn continuation_start(tokenizer: &mut Tokenizer) -> State { /// ^ /// | asd /// ``` -fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(continuation_before)) + State::Fn(StateName::HtmlFlowContinuationBefore) } _ => unreachable!("expected eol"), } @@ -715,7 +715,7 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State { /// > | asd /// ^ /// ``` -fn continuation_before(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => continuation_start(tokenizer), _ => { @@ -731,11 +731,11 @@ fn continuation_before(tokenizer: &mut Tokenizer) -> State { /// > | <!--xxx--> /// ^ /// ``` -fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-') => { tokenizer.consume(); - State::Fn(Box::new(continuation_declaration_inside)) + State::Fn(StateName::HtmlFlowContinuationDeclarationInside) } _ => continuation(tokenizer), } @@ -747,12 +747,12 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State { /// > | <script>console.log(1)</script> /// ^ /// ``` -fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'/') => { tokenizer.consume(); tokenizer.tokenize_state.start = tokenizer.point.index; - State::Fn(Box::new(continuation_raw_end_tag)) + State::Fn(StateName::HtmlFlowContinuationRawEndTag) } _ => continuation(tokenizer), } @@ -764,7 +764,7 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { /// > | <script>console.log(1)</script> /// ^^^^^^ /// ``` -fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { // Guaranteed to be valid ASCII bytes. @@ -779,7 +779,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { if HTML_RAW_NAMES.contains(&name.as_str()) { tokenizer.consume(); - State::Fn(Box::new(continuation_close)) + State::Fn(StateName::HtmlFlowContinuationClose) } else { continuation(tokenizer) } @@ -788,7 +788,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { if tokenizer.point.index - tokenizer.tokenize_state.start < HTML_RAW_SIZE_MAX => { tokenizer.consume(); - State::Fn(Box::new(continuation_raw_end_tag)) + State::Fn(StateName::HtmlFlowContinuationRawEndTag) } _ => { tokenizer.tokenize_state.start = 0; @@ -803,11 +803,11 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { /// > | <![CDATA[>&<]]> /// ^ /// ``` -fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b']') => { tokenizer.consume(); - State::Fn(Box::new(continuation_declaration_inside)) + State::Fn(StateName::HtmlFlowContinuationDeclarationInside) } _ => continuation(tokenizer), } @@ -827,15 +827,15 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State { /// > | <![CDATA[>&<]]> /// ^ /// ``` -fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.consume(); - State::Fn(Box::new(continuation_close)) + State::Fn(StateName::HtmlFlowContinuationClose) } Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => { tokenizer.consume(); - State::Fn(Box::new(continuation_declaration_inside)) + State::Fn(StateName::HtmlFlowContinuationDeclarationInside) } _ => continuation(tokenizer), } @@ -847,7 +847,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { /// > | <!doctype> /// ^ /// ``` -fn continuation_close(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::HtmlFlowData); @@ -855,7 +855,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Fn(Box::new(continuation_close)) + State::Fn(StateName::HtmlFlowContinuationClose) } } } @@ -866,7 +866,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State { /// > | <!doctype> /// ^ /// ``` -fn continuation_after(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_after(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::HtmlFlow); tokenizer.tokenize_state.marker = 0; // Feel free to interrupt. @@ -883,9 +883,9 @@ fn continuation_after(tokenizer: &mut Tokenizer) -> State { /// ^ /// | /// ``` -fn blank_line_before(tokenizer: &mut Tokenizer) -> State { +pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(blank_line)) + State::Fn(StateName::BlankLineStart) } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index a4c0349..1c1f9e6 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -57,7 +57,7 @@ use crate::constant::HTML_CDATA_PREFIX; use crate::construct::partial_space_or_tab::space_or_tab; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of HTML (text) /// @@ -70,7 +70,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::HtmlText); tokenizer.enter(Token::HtmlTextData); tokenizer.consume(); - State::Fn(Box::new(open)) + State::Fn(StateName::HtmlTextOpen) } else { State::Nok } @@ -86,24 +86,24 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | a <!--b--> c /// ^ /// ``` -fn open(tokenizer: &mut Tokenizer) -> State { +pub fn open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'!') => { tokenizer.consume(); - State::Fn(Box::new(declaration_open)) + State::Fn(StateName::HtmlTextDeclarationOpen) } Some(b'/') => { tokenizer.consume(); - State::Fn(Box::new(tag_close_start)) + State::Fn(StateName::HtmlTextTagCloseStart) } Some(b'?') => { tokenizer.consume(); - State::Fn(Box::new(instruction)) + State::Fn(StateName::HtmlTextInstruction) } // ASCII alphabetical. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(tag_open)) + State::Fn(StateName::HtmlTextTagOpen) } _ => State::Nok, } @@ -119,20 +119,20 @@ fn open(tokenizer: &mut Tokenizer) -> State { /// > | a <![CDATA[>&<]]> c /// ^ /// ``` -fn declaration_open(tokenizer: &mut Tokenizer) -> State { +pub fn declaration_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-') => { tokenizer.consume(); - State::Fn(Box::new(comment_open_inside)) + State::Fn(StateName::HtmlTextCommentOpenInside) } // ASCII alphabetical. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(declaration)) + State::Fn(StateName::HtmlTextDeclaration) } Some(b'[') => { tokenizer.consume(); - State::Fn(Box::new(cdata_open_inside)) + State::Fn(StateName::HtmlTextCdataOpenInside) } _ => State::Nok, } @@ -144,11 +144,11 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State { /// > | a <!--b--> c /// ^ /// ``` -fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { +pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-') => { tokenizer.consume(); - State::Fn(Box::new(comment_start)) + State::Fn(StateName::HtmlTextCommentStart) } _ => State::Nok, } @@ -167,12 +167,12 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { /// ``` /// /// [html_flow]: crate::construct::html_flow -fn comment_start(tokenizer: &mut Tokenizer) -> State { +pub fn comment_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => State::Nok, Some(b'-') => { tokenizer.consume(); - State::Fn(Box::new(comment_start_dash)) + State::Fn(StateName::HtmlTextCommentStartDash) } _ => comment(tokenizer), } @@ -191,7 +191,7 @@ fn comment_start(tokenizer: &mut Tokenizer) -> State { /// ``` /// /// [html_flow]: crate::construct::html_flow -fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { +pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => State::Nok, _ => comment(tokenizer), @@ -204,20 +204,20 @@ fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { /// > | a <!--b--> c /// ^ /// ``` -fn comment(tokenizer: &mut Tokenizer) -> State { +pub fn comment(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Nok, Some(b'\n') => { - tokenizer.tokenize_state.return_state = Some(Box::new(comment)); - at_line_ending(tokenizer) + tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextComment); + line_ending_before(tokenizer) } Some(b'-') => { tokenizer.consume(); - State::Fn(Box::new(comment_close)) + State::Fn(StateName::HtmlTextCommentClose) } _ => { tokenizer.consume(); - State::Fn(Box::new(comment)) + State::Fn(StateName::HtmlTextComment) } } } @@ -228,11 +228,11 @@ fn comment(tokenizer: &mut Tokenizer) -> State { /// > | a <!--b--> c /// ^ /// ``` -fn comment_close(tokenizer: &mut Tokenizer) -> State { +pub fn comment_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-') => { tokenizer.consume(); - State::Fn(Box::new(end)) + State::Fn(StateName::HtmlTextEnd) } _ => comment(tokenizer), } @@ -244,16 +244,16 @@ fn comment_close(tokenizer: &mut Tokenizer) -> State { /// > | a <![CDATA[>&<]]> b /// ^^^^^^ /// ``` -fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { +pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) { tokenizer.tokenize_state.size += 1; tokenizer.consume(); if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() { tokenizer.tokenize_state.size = 0; - State::Fn(Box::new(cdata)) + State::Fn(StateName::HtmlTextCdata) } else { - State::Fn(Box::new(cdata_open_inside)) + State::Fn(StateName::HtmlTextCdataOpenInside) } } else { State::Nok @@ -266,20 +266,20 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { /// > | a <![CDATA[>&<]]> b /// ^^^ /// ``` -fn cdata(tokenizer: &mut Tokenizer) -> State { +pub fn cdata(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Nok, Some(b'\n') => { - tokenizer.tokenize_state.return_state = Some(Box::new(cdata)); - at_line_ending(tokenizer) + tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextCdata); + line_ending_before(tokenizer) } Some(b']') => { tokenizer.consume(); - State::Fn(Box::new(cdata_close)) + State::Fn(StateName::HtmlTextCdataClose) } _ => { tokenizer.consume(); - State::Fn(Box::new(cdata)) + State::Fn(StateName::HtmlTextCdata) } } } @@ -290,11 +290,11 @@ fn cdata(tokenizer: &mut Tokenizer) -> State { /// > | a <![CDATA[>&<]]> b /// ^ /// ``` -fn cdata_close(tokenizer: &mut Tokenizer) -> State { +pub fn cdata_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b']') => { tokenizer.consume(); - State::Fn(Box::new(cdata_end)) + State::Fn(StateName::HtmlTextCdataEnd) } _ => cdata(tokenizer), } @@ -306,7 +306,7 @@ fn cdata_close(tokenizer: &mut Tokenizer) -> State { /// > | a <![CDATA[>&<]]> b /// ^ /// ``` -fn cdata_end(tokenizer: &mut Tokenizer) -> State { +pub fn cdata_end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => end(tokenizer), Some(b']') => cdata_close(tokenizer), @@ -320,16 +320,16 @@ fn cdata_end(tokenizer: &mut Tokenizer) -> State { /// > | a <!b> c /// ^ /// ``` -fn declaration(tokenizer: &mut Tokenizer) -> State { +pub fn declaration(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'>') => end(tokenizer), Some(b'\n') => { - tokenizer.tokenize_state.return_state = Some(Box::new(declaration)); - at_line_ending(tokenizer) + tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextDeclaration); + line_ending_before(tokenizer) } _ => { tokenizer.consume(); - State::Fn(Box::new(declaration)) + State::Fn(StateName::HtmlTextDeclaration) } } } @@ -340,20 +340,20 @@ fn declaration(tokenizer: &mut Tokenizer) -> State { /// > | a <?b?> c /// ^ /// ``` -fn instruction(tokenizer: &mut Tokenizer) -> State { +pub fn instruction(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Nok, Some(b'\n') => { - tokenizer.tokenize_state.return_state = Some(Box::new(instruction)); - at_line_ending(tokenizer) + tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextInstruction); + line_ending_before(tokenizer) } Some(b'?') => { tokenizer.consume(); - State::Fn(Box::new(instruction_close)) + State::Fn(StateName::HtmlTextInstructionClose) } _ => { tokenizer.consume(); - State::Fn(Box::new(instruction)) + State::Fn(StateName::HtmlTextInstruction) } } } @@ -364,7 +364,7 @@ fn instruction(tokenizer: &mut Tokenizer) -> State { /// > | a <?b?> c /// ^ /// ``` -fn instruction_close(tokenizer: &mut Tokenizer) -> State { +pub fn instruction_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => end(tokenizer), _ => instruction(tokenizer), @@ -377,12 +377,12 @@ fn instruction_close(tokenizer: &mut Tokenizer) -> State { /// > | a </b> c /// ^ /// ``` -fn tag_close_start(tokenizer: &mut Tokenizer) -> State { +pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphabetical. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(tag_close)) + State::Fn(StateName::HtmlTextTagClose) } _ => State::Nok, } @@ -394,12 +394,12 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State { /// > | a </b> c /// ^ /// ``` -fn tag_close(tokenizer: &mut Tokenizer) -> State { +pub fn tag_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphanumerical and `-`. Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(tag_close)) + State::Fn(StateName::HtmlTextTagClose) } _ => tag_close_between(tokenizer), } @@ -411,15 +411,15 @@ fn tag_close(tokenizer: &mut Tokenizer) -> State { /// > | a </b> c /// ^ /// ``` -fn tag_close_between(tokenizer: &mut Tokenizer) -> State { +pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { - tokenizer.tokenize_state.return_state = Some(Box::new(tag_close_between)); - at_line_ending(tokenizer) + tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagCloseBetween); + line_ending_before(tokenizer) } Some(b'\t' | b' ') => { tokenizer.consume(); - State::Fn(Box::new(tag_close_between)) + State::Fn(StateName::HtmlTextTagCloseBetween) } _ => end(tokenizer), } @@ -431,12 +431,12 @@ fn tag_close_between(tokenizer: &mut Tokenizer) -> State { /// > | a <b> c /// ^ /// ``` -fn tag_open(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphanumerical and `-`. Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(tag_open)) + State::Fn(StateName::HtmlTextTagOpen) } Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer), _ => State::Nok, @@ -449,24 +449,24 @@ fn tag_open(tokenizer: &mut Tokenizer) -> State { /// > | a <b> c /// ^ /// ``` -fn tag_open_between(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { - tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_between)); - at_line_ending(tokenizer) + tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagOpenBetween); + line_ending_before(tokenizer) } Some(b'\t' | b' ') => { tokenizer.consume(); - State::Fn(Box::new(tag_open_between)) + State::Fn(StateName::HtmlTextTagOpenBetween) } Some(b'/') => { tokenizer.consume(); - State::Fn(Box::new(end)) + State::Fn(StateName::HtmlTextEnd) } // ASCII alphabetical and `:` and `_`. Some(b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_name)) + State::Fn(StateName::HtmlTextTagOpenAttributeName) } _ => end(tokenizer), } @@ -478,12 +478,12 @@ fn tag_open_between(tokenizer: &mut Tokenizer) -> State { /// > | a <b c> d /// ^ /// ``` -fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphabetical and `-`, `.`, `:`, and `_`. Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => { tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_name)) + State::Fn(StateName::HtmlTextTagOpenAttributeName) } _ => tag_open_attribute_name_after(tokenizer), } @@ -496,19 +496,20 @@ fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { /// > | a <b c> d /// ^ /// ``` -fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { - tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_name_after)); - at_line_ending(tokenizer) + tokenizer.tokenize_state.return_state = + Some(StateName::HtmlTextTagOpenAttributeNameAfter); + line_ending_before(tokenizer) } Some(b'\t' | b' ') => { tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_name_after)) + State::Fn(StateName::HtmlTextTagOpenAttributeNameAfter) } Some(b'=') => { tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_value_before)) + State::Fn(StateName::HtmlTextTagOpenAttributeValueBefore) } _ => tag_open_between(tokenizer), } @@ -521,25 +522,26 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { /// > | a <b c=d> e /// ^ /// ``` -fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok, Some(b'\n') => { - tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_before)); - at_line_ending(tokenizer) + tokenizer.tokenize_state.return_state = + Some(StateName::HtmlTextTagOpenAttributeValueBefore); + line_ending_before(tokenizer) } Some(b'\t' | b' ') => { tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_value_before)) + State::Fn(StateName::HtmlTextTagOpenAttributeValueBefore) } Some(b'"' | b'\'') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_value_quoted)) + State::Fn(StateName::HtmlTextTagOpenAttributeValueQuoted) } Some(_) => { tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_value_unquoted)) + State::Fn(StateName::HtmlTextTagOpenAttributeValueUnquoted) } } } @@ -550,24 +552,25 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { /// > | a <b c="d"> e /// ^ /// ``` -fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { tokenizer.tokenize_state.marker = 0; State::Nok } Some(b'\n') => { - tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_quoted)); - at_line_ending(tokenizer) + tokenizer.tokenize_state.return_state = + Some(StateName::HtmlTextTagOpenAttributeValueQuoted); + line_ending_before(tokenizer) } Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.marker = 0; tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_value_quoted_after)) + State::Fn(StateName::HtmlTextTagOpenAttributeValueQuotedAfter) } _ => { tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_value_quoted)) + State::Fn(StateName::HtmlTextTagOpenAttributeValueQuoted) } } } @@ -578,13 +581,13 @@ fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { /// > | a <b c=d> e /// ^ /// ``` -fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'"' | b'\'' | b'<' | b'=' | b'`') => State::Nok, Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer), Some(_) => { tokenizer.consume(); - State::Fn(Box::new(tag_open_attribute_value_unquoted)) + State::Fn(StateName::HtmlTextTagOpenAttributeValueUnquoted) } } } @@ -596,7 +599,7 @@ fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { /// > | a <b c="d"> e /// ^ /// ``` -fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => tag_open_between(tokenizer), _ => State::Nok, @@ -609,7 +612,7 @@ fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { /// > | a <b c="d"> e /// ^ /// ``` -fn end(tokenizer: &mut Tokenizer) -> State { +pub fn end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.consume(); @@ -631,14 +634,14 @@ fn end(tokenizer: &mut Tokenizer) -> State { /// ^ /// | b--> /// ``` -fn at_line_ending(tokenizer: &mut Tokenizer) -> State { +pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { tokenizer.exit(Token::HtmlTextData); tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(after_line_ending)) + State::Fn(StateName::HtmlTextLineEndingAfter) } _ => unreachable!("expected eol"), } @@ -654,8 +657,9 @@ fn at_line_ending(tokenizer: &mut Tokenizer) -> State { /// > | b--> /// ^ /// ``` -fn after_line_ending(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt_opt(space_or_tab(), after_line_ending_prefix)(tokenizer) +pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::HtmlTextLineEndingAfterPrefix) } /// After a line ending, after indent. @@ -668,8 +672,9 @@ fn after_line_ending(tokenizer: &mut Tokenizer) -> State { /// > | b--> /// ^ /// ``` -fn after_line_ending_prefix(tokenizer: &mut Tokenizer) -> State { - let return_state = tokenizer.tokenize_state.return_state.take().unwrap(); +pub fn line_ending_after_prefix(tokenizer: &mut Tokenizer) -> State { + let state_name = tokenizer.tokenize_state.return_state.take().unwrap(); + let func = state_name.to_func(); tokenizer.enter(Token::HtmlTextData); - return_state(tokenizer) + func(tokenizer) } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index b38e15a..ae9fe77 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -147,12 +147,9 @@ //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX; -use crate::construct::{ - partial_destination::start as destination, partial_label::start as label, - partial_space_or_tab::space_or_tab_eol, partial_title::start as title, -}; +use crate::construct::partial_space_or_tab::space_or_tab_eol; use crate::token::Token; -use crate::tokenizer::{Event, EventType, Media, State, Tokenizer}; +use crate::tokenizer::{Event, EventType, Media, State, StateName, Tokenizer}; use crate::util::{ normalize_identifier::normalize_identifier, skip, @@ -204,7 +201,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(Token::LabelMarker); tokenizer.exit(Token::LabelEnd); - return State::Fn(Box::new(after)); + return State::Fn(StateName::LabelEndAfter); } } @@ -223,7 +220,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | [a] b /// ^ /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State { let start = &tokenizer.label_start_stack[tokenizer.tokenize_state.start]; let defined = tokenizer .parse_state @@ -240,19 +237,23 @@ fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Resource (`[asd](fgh)`)? - Some(b'(') => tokenizer.attempt(resource, move |is_ok| { - Box::new(if is_ok || defined { ok } else { nok }) - })(tokenizer), + Some(b'(') => tokenizer.attempt(StateName::LabelEndResourceStart, move |is_ok| { + State::Fn(if is_ok || defined { + StateName::LabelEndOk + } else { + StateName::LabelEndNok + }) + }), // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference? - Some(b'[') => tokenizer.attempt(full_reference, move |is_ok| { - Box::new(if is_ok { - ok + Some(b'[') => tokenizer.attempt(StateName::LabelEndReferenceFull, move |is_ok| { + State::Fn(if is_ok { + StateName::LabelEndOk } else if defined { - reference_not_full + StateName::LabelEndReferenceNotFull } else { - nok + StateName::LabelEndNok }) - })(tokenizer), + }), // Shortcut (`[asd]`) reference? _ => { let func = if defined { ok } else { nok }; @@ -271,10 +272,14 @@ fn after(tokenizer: &mut Tokenizer) -> State { /// > | [a] b /// ^ /// ``` -fn reference_not_full(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(collapsed_reference, |is_ok| { - Box::new(if is_ok { ok } else { nok }) - })(tokenizer) +pub fn reference_not_full(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt(StateName::LabelEndReferenceCollapsed, |is_ok| { + State::Fn(if is_ok { + StateName::LabelEndOk + } else { + StateName::LabelEndNok + }) + }) } /// Done, we found something. @@ -289,7 +294,7 @@ fn reference_not_full(tokenizer: &mut Tokenizer) -> State { /// > | [a] b /// ^ /// ``` -fn ok(tokenizer: &mut Tokenizer) -> State { +pub fn ok(tokenizer: &mut Tokenizer) -> State { let label_start_index = tokenizer.tokenize_state.start; // Remove this one and everything after it. let mut left = tokenizer.label_start_stack.split_off(label_start_index); @@ -332,7 +337,7 @@ fn ok(tokenizer: &mut Tokenizer) -> State { /// > | [a] b /// ^ /// ``` -fn nok(tokenizer: &mut Tokenizer) -> State { +pub fn nok(tokenizer: &mut Tokenizer) -> State { tokenizer .label_start_stack .get_mut(tokenizer.tokenize_state.start) @@ -349,14 +354,14 @@ fn nok(tokenizer: &mut Tokenizer) -> State { /// > | [a](b) c /// ^ /// ``` -fn resource(tokenizer: &mut Tokenizer) -> State { +pub fn resource_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'(') => { tokenizer.enter(Token::Resource); tokenizer.enter(Token::ResourceMarker); tokenizer.consume(); tokenizer.exit(Token::ResourceMarker); - State::Fn(Box::new(resource_start)) + State::Fn(StateName::LabelEndResourceBefore) } _ => unreachable!("expected `(`"), } @@ -368,8 +373,9 @@ fn resource(tokenizer: &mut Tokenizer) -> State { /// > | [a](b) c /// ^ /// ``` -fn resource_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt_opt(space_or_tab_eol(), resource_open)(tokenizer) +pub fn resource_before(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab_eol(tokenizer); + tokenizer.attempt_opt(state_name, StateName::LabelEndResourceOpen) } /// At the start of a resource, after optional whitespace. @@ -378,7 +384,7 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State { /// > | [a](b) c /// ^ /// ``` -fn resource_open(tokenizer: &mut Tokenizer) -> State { +pub fn resource_open(tokenizer: &mut Tokenizer) -> State { if let Some(b')') = tokenizer.current { resource_end(tokenizer) } else { @@ -389,13 +395,13 @@ fn resource_open(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_5 = Token::ResourceDestinationString; tokenizer.tokenize_state.size_other = RESOURCE_DESTINATION_BALANCE_MAX; - tokenizer.attempt(destination, |ok| { - Box::new(if ok { - destination_after + tokenizer.attempt(StateName::DestinationStart, |ok| { + State::Fn(if ok { + StateName::LabelEndResourceDestinationAfter } else { - destination_missing + StateName::LabelEndResourceDestinationMissing }) - })(tokenizer) + }) } } @@ -405,21 +411,26 @@ fn resource_open(tokenizer: &mut Tokenizer) -> State { /// > | [a](b) c /// ^ /// ``` -fn destination_after(tokenizer: &mut Tokenizer) -> State { +pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; tokenizer.tokenize_state.token_4 = Token::Data; tokenizer.tokenize_state.token_5 = Token::Data; tokenizer.tokenize_state.size_other = 0; - - tokenizer.attempt(space_or_tab_eol(), |ok| { - Box::new(if ok { resource_between } else { resource_end }) - })(tokenizer) + let state_name = space_or_tab_eol(tokenizer); + + tokenizer.attempt(state_name, |ok| { + State::Fn(if ok { + StateName::LabelEndResourceBetween + } else { + StateName::LabelEndResourceEnd + }) + }) } /// Without destination. -fn destination_missing(tokenizer: &mut Tokenizer) -> State { +pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; @@ -435,13 +446,13 @@ fn destination_missing(tokenizer: &mut Tokenizer) -> State { /// > | [a](b ) c /// ^ /// ``` -fn resource_between(tokenizer: &mut Tokenizer) -> State { +pub fn resource_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'"' | b'\'' | b'(') => { tokenizer.tokenize_state.token_1 = Token::ResourceTitle; tokenizer.tokenize_state.token_2 = Token::ResourceTitleMarker; tokenizer.tokenize_state.token_3 = Token::ResourceTitleString; - tokenizer.go(title, title_after)(tokenizer) + tokenizer.go(StateName::TitleStart, StateName::LabelEndResourceTitleAfter) } _ => resource_end(tokenizer), } @@ -453,11 +464,12 @@ fn resource_between(tokenizer: &mut Tokenizer) -> State { /// > | [a](b "c") d /// ^ /// ``` -fn title_after(tokenizer: &mut Tokenizer) -> State { +pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; - tokenizer.attempt_opt(space_or_tab_eol(), resource_end)(tokenizer) + let state_name = space_or_tab_eol(tokenizer); + tokenizer.attempt_opt(state_name, StateName::LabelEndResourceEnd) } /// In a resource, at the `)`. @@ -466,7 +478,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State { /// > | [a](b) d /// ^ /// ``` -fn resource_end(tokenizer: &mut Tokenizer) -> State { +pub fn resource_end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b')') => { tokenizer.enter(Token::ResourceMarker); @@ -485,13 +497,13 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State { /// > | [a][b] d /// ^ /// ``` -fn full_reference(tokenizer: &mut Tokenizer) -> State { +pub fn reference_full(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { tokenizer.tokenize_state.token_1 = Token::Reference; tokenizer.tokenize_state.token_2 = Token::ReferenceMarker; tokenizer.tokenize_state.token_3 = Token::ReferenceString; - tokenizer.go(label, full_reference_after)(tokenizer) + tokenizer.go(StateName::LabelStart, StateName::LabelEndReferenceFullAfter) } _ => unreachable!("expected `[`"), } @@ -503,7 +515,7 @@ fn full_reference(tokenizer: &mut Tokenizer) -> State { /// > | [a][b] d /// ^ /// ``` -fn full_reference_after(tokenizer: &mut Tokenizer) -> State { +pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; @@ -541,14 +553,14 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State { /// > | [a][] d /// ^ /// ``` -fn collapsed_reference(tokenizer: &mut Tokenizer) -> State { +pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { tokenizer.enter(Token::Reference); tokenizer.enter(Token::ReferenceMarker); tokenizer.consume(); tokenizer.exit(Token::ReferenceMarker); - State::Fn(Box::new(collapsed_reference_open)) + State::Fn(StateName::LabelEndReferenceCollapsedOpen) } _ => State::Nok, } @@ -562,7 +574,7 @@ fn collapsed_reference(tokenizer: &mut Tokenizer) -> State { /// > | [a][] d /// ^ /// ``` -fn collapsed_reference_open(tokenizer: &mut Tokenizer) -> State { +pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b']') => { tokenizer.enter(Token::ReferenceMarker); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 4a3508e..4fcf8c2 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -30,7 +30,7 @@ use super::label_end::resolve_media; use crate::token::Token; -use crate::tokenizer::{LabelStart, State, Tokenizer}; +use crate::tokenizer::{LabelStart, State, StateName, Tokenizer}; /// Start of label (image) start. /// @@ -45,7 +45,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LabelImageMarker); tokenizer.consume(); tokenizer.exit(Token::LabelImageMarker); - State::Fn(Box::new(open)) + State::Fn(StateName::LabelStartImageOpen) } _ => State::Nok, } diff --git a/src/construct/list.rs b/src/construct/list.rs index 0e12b7c..6ecfb04 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -45,12 +45,9 @@ //! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}; -use crate::construct::{ - blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max, - thematic_break::start as thematic_break, -}; +use crate::construct::partial_space_or_tab::space_or_tab_min_max; use crate::token::Token; -use crate::tokenizer::{EventType, State, Tokenizer}; +use crate::tokenizer::{EventType, State, StateName, Tokenizer}; use crate::util::{ skip, slice::{Position, Slice}, @@ -65,17 +62,16 @@ use crate::util::{ pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.list { tokenizer.enter(Token::ListItem); - tokenizer.go( - space_or_tab_min_max( - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - ), - before, - )(tokenizer) + let state_name = space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + ); + tokenizer.go(state_name, StateName::ListBefore) } else { State::Nok } @@ -87,12 +83,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | * a /// ^ /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Unordered. - Some(b'*' | b'-') => tokenizer.check(thematic_break, |ok| { - Box::new(if ok { nok } else { before_unordered }) - })(tokenizer), + Some(b'*' | b'-') => tokenizer.check(StateName::ThematicBreakStart, |ok| { + State::Fn(if ok { + StateName::ListNok + } else { + StateName::ListBeforeUnordered + }) + }), Some(b'+') => before_unordered(tokenizer), // Ordered. Some(b'0'..=b'9') if !tokenizer.interrupt => before_ordered(tokenizer), @@ -109,7 +109,7 @@ fn before(tokenizer: &mut Tokenizer) -> State { /// > | * a /// ^ /// ``` -fn before_unordered(tokenizer: &mut Tokenizer) -> State { +pub fn before_unordered(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::ListItemPrefix); marker(tokenizer) } @@ -120,10 +120,10 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State { /// > | * a /// ^ /// ``` -fn before_ordered(tokenizer: &mut Tokenizer) -> State { +pub fn before_ordered(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::ListItemPrefix); tokenizer.enter(Token::ListItemValue); - inside(tokenizer) + value(tokenizer) } /// In an ordered list item value. @@ -132,7 +132,7 @@ fn before_ordered(tokenizer: &mut Tokenizer) -> State { /// > | 1. a /// ^ /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn value(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'.' | b')') if !tokenizer.interrupt || tokenizer.tokenize_state.size < 2 => { tokenizer.exit(Token::ListItemValue); @@ -141,7 +141,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State { Some(b'0'..=b'9') if tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX => { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Fn(Box::new(inside)) + State::Fn(StateName::ListValue) } _ => { tokenizer.tokenize_state.size = 0; @@ -158,11 +158,11 @@ fn inside(tokenizer: &mut Tokenizer) -> State { /// > | 1. b /// ^ /// ``` -fn marker(tokenizer: &mut Tokenizer) -> State { +pub fn marker(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::ListItemMarker); tokenizer.consume(); tokenizer.exit(Token::ListItemMarker); - State::Fn(Box::new(marker_after)) + State::Fn(StateName::ListMarkerAfter) } /// After a list item marker. @@ -173,11 +173,15 @@ fn marker(tokenizer: &mut Tokenizer) -> State { /// > | 1. b /// ^ /// ``` -fn marker_after(tokenizer: &mut Tokenizer) -> State { +pub fn marker_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 1; - tokenizer.check(blank_line, |ok| { - Box::new(if ok { after } else { marker_after_not_blank }) - })(tokenizer) + tokenizer.check(StateName::BlankLineStart, |ok| { + State::Fn(if ok { + StateName::ListAfter + } else { + StateName::ListMarkerAfterFilled + }) + }) } /// After a list item marker, not followed by a blank line. @@ -186,13 +190,17 @@ fn marker_after(tokenizer: &mut Tokenizer) -> State { /// > | * a /// ^ /// ``` -fn marker_after_not_blank(tokenizer: &mut Tokenizer) -> State { +pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 0; // Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace. - tokenizer.attempt(whitespace, |ok| { - Box::new(if ok { after } else { prefix_other }) - })(tokenizer) + tokenizer.attempt(StateName::ListWhitespace, |ok| { + State::Fn(if ok { + StateName::ListAfter + } else { + StateName::ListPrefixOther + }) + }) } /// In whitespace after a marker. @@ -201,8 +209,9 @@ fn marker_after_not_blank(tokenizer: &mut Tokenizer) -> State { /// > | * a /// ^ /// ``` -fn whitespace(tokenizer: &mut Tokenizer) -> State { - tokenizer.go(space_or_tab_min_max(1, TAB_SIZE), whitespace_after)(tokenizer) +pub fn whitespace(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab_min_max(tokenizer, 1, TAB_SIZE); + tokenizer.go(state_name, StateName::ListWhitespaceAfter) } /// After acceptable whitespace. @@ -211,7 +220,7 @@ fn whitespace(tokenizer: &mut Tokenizer) -> State { /// > | * a /// ^ /// ``` -fn whitespace_after(tokenizer: &mut Tokenizer) -> State { +pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State { if let Some(b'\t' | b' ') = tokenizer.current { State::Nok } else { @@ -225,13 +234,13 @@ fn whitespace_after(tokenizer: &mut Tokenizer) -> State { /// > | * a /// ^ /// ``` -fn prefix_other(tokenizer: &mut Tokenizer) -> State { +pub fn prefix_other(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') => { tokenizer.enter(Token::SpaceOrTab); tokenizer.consume(); tokenizer.exit(Token::SpaceOrTab); - State::Fn(Box::new(after)) + State::Fn(StateName::ListAfter) } _ => State::Nok, } @@ -243,7 +252,7 @@ fn prefix_other(tokenizer: &mut Tokenizer) -> State { /// > | * a /// ^ /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State { let blank = tokenizer.tokenize_state.size == 1; tokenizer.tokenize_state.size = 0; @@ -285,10 +294,14 @@ fn after(tokenizer: &mut Tokenizer) -> State { /// > | b /// ^ /// ``` -pub fn cont(tokenizer: &mut Tokenizer) -> State { - tokenizer.check(blank_line, |ok| { - Box::new(if ok { blank_cont } else { not_blank_cont }) - })(tokenizer) +pub fn cont_start(tokenizer: &mut Tokenizer) -> State { + tokenizer.check(StateName::BlankLineStart, |ok| { + State::Fn(if ok { + StateName::ListContBlank + } else { + StateName::ListContFilled + }) + }) } /// Start of blank list item continuation. @@ -299,15 +312,16 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State { /// ^ /// | b /// ``` -pub fn blank_cont(tokenizer: &mut Tokenizer) -> State { +pub fn cont_blank(tokenizer: &mut Tokenizer) -> State { let container = tokenizer.container.as_ref().unwrap(); let size = container.size; if container.blank_initial { State::Nok } else { + let state_name = space_or_tab_min_max(tokenizer, 0, size); // Consume, optionally, at most `size`. - tokenizer.go(space_or_tab_min_max(0, size), ok)(tokenizer) + tokenizer.go(state_name, StateName::ListOk) } } @@ -318,14 +332,15 @@ pub fn blank_cont(tokenizer: &mut Tokenizer) -> State { /// > | b /// ^ /// ``` -pub fn not_blank_cont(tokenizer: &mut Tokenizer) -> State { +pub fn cont_filled(tokenizer: &mut Tokenizer) -> State { let container = tokenizer.container.as_mut().unwrap(); let size = container.size; container.blank_initial = false; // Consume exactly `size`. - tokenizer.go(space_or_tab_min_max(size, size), ok)(tokenizer) + let state_name = space_or_tab_min_max(tokenizer, size, size); + tokenizer.go(state_name, StateName::ListOk) } /// A state fn to yield [`State::Ok`]. @@ -334,16 +349,16 @@ pub fn ok(_tokenizer: &mut Tokenizer) -> State { } /// A state fn to yield [`State::Nok`]. -fn nok(_tokenizer: &mut Tokenizer) -> State { +pub fn nok(_tokenizer: &mut Tokenizer) -> State { State::Nok } /// Find adjacent list items with the same marker. pub fn resolve_list_item(tokenizer: &mut Tokenizer) { - let mut index = 0; - let mut balance = 0; let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![]; let mut lists: Vec<(u8, usize, usize, usize)> = vec![]; + let mut index = 0; + let mut balance = 0; // Merge list items. while index < tokenizer.events.len() { diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 7fdaa66..de750f4 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -33,7 +33,7 @@ //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element use crate::token::Token; -use crate::tokenizer::{ContentType, EventType, State, Tokenizer}; +use crate::tokenizer::{ContentType, EventType, State, StateName, Tokenizer}; use crate::util::skip::opt as skip_opt; /// Before a paragraph. @@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | abc /// ^^^ /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::Data); @@ -71,7 +71,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Fn(Box::new(inside)) + State::Fn(StateName::ParagraphInside) } } } diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs index 2257bfd..b32b7f9 100644 --- a/src/construct/partial_bom.rs +++ b/src/construct/partial_bom.rs @@ -11,7 +11,7 @@ //! * [`micromark/lib/preprocess.js` in `micromark`](https://github.com/micromark/micromark/blob/ed23453/packages/micromark/dev/lib/preprocess.js#L54-L60) use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF]; @@ -36,7 +36,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | 0xEF 0xBB 0xBF /// ^^^^ ^^^^ ^^^^ /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(BOM[tokenizer.tokenize_state.size]) { tokenizer.tokenize_state.size += 1; tokenizer.consume(); @@ -45,7 +45,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 0; State::Ok } else { - State::Fn(Box::new(inside)) + State::Fn(StateName::BomInside) } } else { tokenizer.tokenize_state.size = 0; diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index 0365489..1cb5e61 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -7,7 +7,7 @@ //! [text]: crate::content::text use crate::token::Token; -use crate::tokenizer::{EventType, State, Tokenizer}; +use crate::tokenizer::{EventType, State, StateName, Tokenizer}; /// At the beginning of data. /// @@ -17,10 +17,11 @@ use crate::tokenizer::{EventType, State, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { + // Make sure to eat the first `stop`. Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => { tokenizer.enter(Token::Data); tokenizer.consume(); - State::Fn(Box::new(data)) + State::Fn(StateName::DataInside) } _ => at_break(tokenizer), } @@ -32,14 +33,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | abc /// ^ /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'\n') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(at_break)) + State::Fn(StateName::DataAtBreak) } Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => { tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data)); @@ -47,7 +48,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.enter(Token::Data); - data(tokenizer) + inside(tokenizer) } } } @@ -58,7 +59,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { /// > | abc /// ^^^ /// ``` -fn data(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State { let done = match tokenizer.current { None | Some(b'\n') => true, Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => true, @@ -70,7 +71,7 @@ fn data(tokenizer: &mut Tokenizer) -> State { at_break(tokenizer) } else { tokenizer.consume(); - State::Fn(Box::new(data)) + State::Fn(StateName::DataInside) } } diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index f1cfc7d..e8818a0 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -72,7 +72,7 @@ //! [sanitize_uri]: crate::util::sanitize_uri use crate::token::Token; -use crate::tokenizer::{ContentType, State, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer}; /// Before a destination. /// @@ -90,7 +90,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); tokenizer.consume(); tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); - State::Fn(Box::new(enclosed_before)) + State::Fn(StateName::DestinationEnclosedBefore) } // ASCII control, space, closing paren, but *not* `\0`. None | Some(0x01..=0x1F | b' ' | b')' | 0x7F) => State::Nok, @@ -110,7 +110,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | <aa> /// ^ /// ``` -fn enclosed_before(tokenizer: &mut Tokenizer) -> State { +pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State { if let Some(b'>') = tokenizer.current { tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); tokenizer.consume(); @@ -131,7 +131,7 @@ fn enclosed_before(tokenizer: &mut Tokenizer) -> State { /// > | <aa> /// ^ /// ``` -fn enclosed(tokenizer: &mut Tokenizer) -> State { +pub fn enclosed(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n' | b'<') => State::Nok, Some(b'>') => { @@ -141,11 +141,11 @@ fn enclosed(tokenizer: &mut Tokenizer) -> State { } Some(b'\\') => { tokenizer.consume(); - State::Fn(Box::new(enclosed_escape)) + State::Fn(StateName::DestinationEnclosedEscape) } _ => { tokenizer.consume(); - State::Fn(Box::new(enclosed)) + State::Fn(StateName::DestinationEnclosed) } } } @@ -156,11 +156,11 @@ fn enclosed(tokenizer: &mut Tokenizer) -> State { /// > | <a\*a> /// ^ /// ``` -fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { +pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'<' | b'>' | b'\\') => { tokenizer.consume(); - State::Fn(Box::new(enclosed)) + State::Fn(StateName::DestinationEnclosed) } _ => enclosed(tokenizer), } @@ -172,7 +172,7 @@ fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { /// > | aa /// ^ /// ``` -fn raw(tokenizer: &mut Tokenizer) -> State { +pub fn raw(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\t' | b'\n' | b' ' | b')') if tokenizer.tokenize_state.size == 0 => { tokenizer.exit(Token::Data); @@ -185,7 +185,7 @@ fn raw(tokenizer: &mut Tokenizer) -> State { Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_other => { tokenizer.consume(); tokenizer.tokenize_state.size += 1; - State::Fn(Box::new(raw)) + State::Fn(StateName::DestinationRaw) } // ASCII control (but *not* `\0`) and space and `(`. None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) => { @@ -195,15 +195,15 @@ fn raw(tokenizer: &mut Tokenizer) -> State { Some(b')') => { tokenizer.consume(); tokenizer.tokenize_state.size -= 1; - State::Fn(Box::new(raw)) + State::Fn(StateName::DestinationRaw) } Some(b'\\') => { tokenizer.consume(); - State::Fn(Box::new(raw_escape)) + State::Fn(StateName::DestinationRawEscape) } Some(_) => { tokenizer.consume(); - State::Fn(Box::new(raw)) + State::Fn(StateName::DestinationRaw) } } } @@ -214,11 +214,11 @@ fn raw(tokenizer: &mut Tokenizer) -> State { /// > | a\*a /// ^ /// ``` -fn raw_escape(tokenizer: &mut Tokenizer) -> State { +pub fn raw_escape(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'(' | b')' | b'\\') => { tokenizer.consume(); - State::Fn(Box::new(raw)) + State::Fn(StateName::DestinationRaw) } _ => raw(tokenizer), } diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 0e1c2ec..0c8366e 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -62,7 +62,7 @@ use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions}; use crate::constant::LINK_REFERENCE_SIZE_MAX; use crate::subtokenize::link; use crate::token::Token; -use crate::tokenizer::{ContentType, State, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer}; /// Before a label. /// @@ -78,7 +78,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); - State::Fn(Box::new(at_break)) + State::Fn(StateName::LabelAtBreak) } _ => State::Nok, } @@ -90,7 +90,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | [a] /// ^ /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX || matches!(tokenizer.current, None | Some(b'[')) || (matches!(tokenizer.current, Some(b']')) && !tokenizer.tokenize_state.seen) @@ -101,13 +101,22 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { State::Nok } else { match tokenizer.current { - Some(b'\n') => tokenizer.attempt( - space_or_tab_eol_with_options(EolOptions { - content_type: Some(ContentType::String), - connect: tokenizer.tokenize_state.connect, - }), - |ok| Box::new(if ok { after_eol } else { at_blank_line }), - )(tokenizer), + Some(b'\n') => { + let state_name = space_or_tab_eol_with_options( + tokenizer, + EolOptions { + content_type: Some(ContentType::String), + connect: tokenizer.tokenize_state.connect, + }, + ); + tokenizer.attempt(state_name, |ok| { + State::Fn(if ok { + StateName::LabelEolAfter + } else { + StateName::LabelAtBlankLine + }) + }) + } Some(b']') => { tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); @@ -129,20 +138,20 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; } - label(tokenizer) + inside(tokenizer) } } } } /// To do. -fn after_eol(tokenizer: &mut Tokenizer) -> State { +pub fn eol_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; at_break(tokenizer) } /// To do. -fn at_blank_line(tokenizer: &mut Tokenizer) -> State { +pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.connect = false; State::Nok @@ -154,7 +163,7 @@ fn at_blank_line(tokenizer: &mut Tokenizer) -> State { /// > | [a] /// ^ /// ``` -fn label(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n' | b'[' | b']') => { tokenizer.exit(Token::Data); @@ -165,13 +174,16 @@ fn label(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::Data); at_break(tokenizer) } else { - let func = if matches!(byte, b'\\') { escape } else { label }; tokenizer.consume(); tokenizer.tokenize_state.size += 1; if !tokenizer.tokenize_state.seen && !matches!(byte, b'\t' | b' ') { tokenizer.tokenize_state.seen = true; } - State::Fn(Box::new(func)) + State::Fn(if matches!(byte, b'\\') { + StateName::LabelEscape + } else { + StateName::LabelInside + }) } } } @@ -183,13 +195,13 @@ fn label(tokenizer: &mut Tokenizer) -> State { /// > | [a\*a] /// ^ /// ``` -fn escape(tokenizer: &mut Tokenizer) -> State { +pub fn escape(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[' | b'\\' | b']') => { tokenizer.consume(); tokenizer.tokenize_state.size += 1; - State::Fn(Box::new(label)) + State::Fn(StateName::LabelInside) } - _ => label(tokenizer), + _ => inside(tokenizer), } } diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 6005a6c..6d5cd7a 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -11,7 +11,7 @@ //! [html_flow]: crate::construct::html_flow use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of continuation. /// @@ -26,7 +26,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(after)) + State::Fn(StateName::NonLazyContinuationAfter) } _ => State::Nok, } @@ -39,7 +39,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | b /// ^ /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State { if tokenizer.lazy { State::Nok } else { diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index e3eac45..b0b35a6 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -6,7 +6,7 @@ use crate::subtokenize::link; use crate::token::Token; -use crate::tokenizer::{ContentType, State, StateFn, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer}; /// Options to parse `space_or_tab`. #[derive(Debug)] @@ -37,8 +37,8 @@ pub struct EolOptions { /// ```bnf /// space_or_tab ::= 1*( ' ' '\t' ) /// ``` -pub fn space_or_tab() -> Box<StateFn> { - space_or_tab_min_max(1, usize::MAX) +pub fn space_or_tab(tokenizer: &mut Tokenizer) -> StateName { + space_or_tab_min_max(tokenizer, 1, usize::MAX) } /// Between `x` and `y` `space_or_tab`. @@ -46,26 +46,27 @@ pub fn space_or_tab() -> Box<StateFn> { /// ```bnf /// space_or_tab_min_max ::= x*y( ' ' '\t' ) /// ``` -pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> { - space_or_tab_with_options(Options { - kind: Token::SpaceOrTab, - min, - max, - content_type: None, - connect: false, - }) +pub fn space_or_tab_min_max(tokenizer: &mut Tokenizer, min: usize, max: usize) -> StateName { + space_or_tab_with_options( + tokenizer, + Options { + kind: Token::SpaceOrTab, + min, + max, + content_type: None, + connect: false, + }, + ) } /// `space_or_tab`, with the given options. -pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> { - Box::new(|tokenizer| { - tokenizer.tokenize_state.space_or_tab_connect = options.connect; - tokenizer.tokenize_state.space_or_tab_content_type = options.content_type; - tokenizer.tokenize_state.space_or_tab_min = options.min; - tokenizer.tokenize_state.space_or_tab_max = options.max; - tokenizer.tokenize_state.space_or_tab_token = options.kind; - start(tokenizer) - }) +pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName { + tokenizer.tokenize_state.space_or_tab_connect = options.connect; + tokenizer.tokenize_state.space_or_tab_content_type = options.content_type; + tokenizer.tokenize_state.space_or_tab_min = options.min; + tokenizer.tokenize_state.space_or_tab_max = options.max; + tokenizer.tokenize_state.space_or_tab_token = options.kind; + StateName::SpaceOrTabStart } /// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and @@ -74,41 +75,21 @@ pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> { /// ```bnf /// space_or_tab_eol ::= 1*( ' ' '\t' ) | 0*( ' ' '\t' ) eol 0*( ' ' '\t' ) /// ``` -pub fn space_or_tab_eol() -> Box<StateFn> { - space_or_tab_eol_with_options(EolOptions { - content_type: None, - connect: false, - }) +pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName { + space_or_tab_eol_with_options( + tokenizer, + EolOptions { + content_type: None, + connect: false, + }, + ) } /// `space_or_tab_eol`, with the given options. -pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> { - Box::new(move |tokenizer| { - tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type; - tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect; - - tokenizer.attempt( - space_or_tab_with_options(Options { - kind: Token::SpaceOrTab, - min: 1, - max: usize::MAX, - content_type: tokenizer - .tokenize_state - .space_or_tab_eol_content_type - .clone(), - connect: tokenizer.tokenize_state.space_or_tab_eol_connect, - }), - move |ok| { - Box::new(move |tokenizer| { - if ok { - tokenizer.tokenize_state.space_or_tab_eol_ok = ok; - } - - after_space_or_tab(tokenizer) - }) - }, - )(tokenizer) - }) +pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: EolOptions) -> StateName { + tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type; + tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect; + StateName::SpaceOrTabEolStart } /// Before `space_or_tab`. @@ -117,7 +98,7 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> { /// > | a␠␠b /// ^ /// ``` -fn start(tokenizer: &mut Tokenizer) -> State { +pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') if tokenizer.tokenize_state.space_or_tab_max > 0 => { tokenizer.enter_with_content( @@ -144,7 +125,7 @@ fn start(tokenizer: &mut Tokenizer) -> State { /// > | a␠␠b /// ^ /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') if tokenizer.tokenize_state.space_or_tab_size @@ -152,7 +133,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State { { tokenizer.consume(); tokenizer.tokenize_state.space_or_tab_size += 1; - State::Fn(Box::new(inside)) + State::Fn(StateName::SpaceOrTabInside) } _ => { tokenizer.exit(tokenizer.tokenize_state.space_or_tab_token.clone()); @@ -167,7 +148,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State { /// > | a␠␠b /// ^ /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State { let state = if tokenizer.tokenize_state.space_or_tab_size >= tokenizer.tokenize_state.space_or_tab_min { @@ -184,6 +165,44 @@ fn after(tokenizer: &mut Tokenizer) -> State { state } +pub fn eol_start(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab_with_options( + tokenizer, + Options { + kind: Token::SpaceOrTab, + min: 1, + max: usize::MAX, + content_type: tokenizer + .tokenize_state + .space_or_tab_eol_content_type + .clone(), + connect: tokenizer.tokenize_state.space_or_tab_eol_connect, + }, + ); + + tokenizer.attempt(state_name, move |ok| { + State::Fn(if ok { + StateName::SpaceOrTabEolAfterFirst + } else { + StateName::SpaceOrTabEolAtEol + }) + }) +} + +pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { + tokenizer.tokenize_state.space_or_tab_eol_ok = true; + + if tokenizer + .tokenize_state + .space_or_tab_eol_content_type + .is_some() + { + tokenizer.tokenize_state.space_or_tab_eol_connect = true; + } + + eol_at_eol(tokenizer) +} + /// `space_or_tab_eol`: after optionally first `space_or_tab`. /// /// ```markdown @@ -191,16 +210,7 @@ fn after(tokenizer: &mut Tokenizer) -> State { /// ^ /// | b /// ``` -fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State { - if tokenizer.tokenize_state.space_or_tab_eol_ok - && tokenizer - .tokenize_state - .space_or_tab_eol_content_type - .is_some() - { - tokenizer.tokenize_state.space_or_tab_eol_connect = true; - } - +pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State { if let Some(b'\n') = tokenizer.current { tokenizer.enter_with_content( Token::LineEnding, @@ -223,17 +233,17 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Fn(Box::new(after_eol)) + State::Fn(StateName::SpaceOrTabEolAfterEol) } else { - let state = if tokenizer.tokenize_state.space_or_tab_eol_ok { - State::Ok - } else { - State::Nok - }; + let ok = tokenizer.tokenize_state.space_or_tab_eol_ok; tokenizer.tokenize_state.space_or_tab_eol_content_type = None; tokenizer.tokenize_state.space_or_tab_eol_connect = false; tokenizer.tokenize_state.space_or_tab_eol_ok = false; - state + if ok { + State::Ok + } else { + State::Nok + } } } @@ -245,9 +255,10 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` #[allow(clippy::needless_pass_by_value)] -fn after_eol(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt_opt( - space_or_tab_with_options(Options { +pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { + let state_name = space_or_tab_with_options( + tokenizer, + Options { kind: Token::SpaceOrTab, min: 1, max: usize::MAX, @@ -256,9 +267,9 @@ fn after_eol(tokenizer: &mut Tokenizer) -> State { .space_or_tab_eol_content_type .clone(), connect: tokenizer.tokenize_state.space_or_tab_eol_connect, - }), - after_more_space_or_tab, - )(tokenizer) + }, + ); + tokenizer.attempt_opt(state_name, StateName::SpaceOrTabEolAfterMore) } /// `space_or_tab_eol`: after more (optional) `space_or_tab`. @@ -268,7 +279,7 @@ fn after_eol(tokenizer: &mut Tokenizer) -> State { /// > | b /// ^ /// ``` -fn after_more_space_or_tab(tokenizer: &mut Tokenizer) -> State { +pub fn eol_after_more(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_eol_content_type = None; tokenizer.tokenize_state.space_or_tab_eol_connect = false; tokenizer.tokenize_state.space_or_tab_eol_ok = false; diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 6bf9099..8b72608 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -30,10 +30,10 @@ //! [character_reference]: crate::construct::character_reference //! [label_end]: crate::construct::label_end -use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions}; +use crate::construct::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions}; use crate::subtokenize::link; use crate::token::Token; -use crate::tokenizer::{ContentType, State, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer}; /// Before a title. /// @@ -50,7 +50,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); tokenizer.consume(); tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); - State::Fn(Box::new(begin)) + State::Fn(StateName::TitleBegin) } _ => State::Nok, } @@ -64,7 +64,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | "a" /// ^ /// ``` -fn begin(tokenizer: &mut Tokenizer) -> State { +pub fn begin(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => @@ -90,20 +90,30 @@ fn begin(tokenizer: &mut Tokenizer) -> State { /// > | "a" /// ^ /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.connect = false; State::Nok } - Some(b'\n') => tokenizer.attempt( - space_or_tab_eol_with_options(EolOptions { - content_type: Some(ContentType::String), - connect: tokenizer.tokenize_state.connect, - }), - |ok| Box::new(if ok { after_eol } else { at_blank_line }), - )(tokenizer), + Some(b'\n') => { + let state_name = space_or_tab_eol_with_options( + tokenizer, + EolOptions { + content_type: Some(ContentType::String), + connect: tokenizer.tokenize_state.connect, + }, + ); + + tokenizer.attempt(state_name, |ok| { + State::Fn(if ok { + StateName::TitleAfterEol + } else { + StateName::TitleAtBlankLine + }) + }) + } Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { @@ -120,19 +130,19 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; } - title(tokenizer) + inside(tokenizer) } } } /// To do. -fn after_eol(tokenizer: &mut Tokenizer) -> State { +pub fn after_eol(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; at_break(tokenizer) } /// To do. -fn at_blank_line(tokenizer: &mut Tokenizer) -> State { +pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.connect = false; State::Nok @@ -144,7 +154,7 @@ fn at_blank_line(tokenizer: &mut Tokenizer) -> State { /// > | "a" /// ^ /// ``` -fn title(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::Data); @@ -157,9 +167,12 @@ fn title(tokenizer: &mut Tokenizer) -> State { at_break(tokenizer) } Some(byte) => { - let func = if matches!(byte, b'\\') { escape } else { title }; tokenizer.consume(); - State::Fn(Box::new(func)) + State::Fn(if matches!(byte, b'\\') { + StateName::TitleEscape + } else { + StateName::TitleInside + }) } } } @@ -170,12 +183,12 @@ fn title(tokenizer: &mut Tokenizer) -> State { /// > | "a\*b" /// ^ /// ``` -fn escape(tokenizer: &mut Tokenizer) -> State { +pub fn escape(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'"' | b'\'' | b')') => { tokenizer.consume(); - State::Fn(Box::new(title)) + State::Fn(StateName::TitleInside) } - _ => title(tokenizer), + _ => inside(tokenizer), } } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 2ed2046..4ed25b6 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -51,7 +51,7 @@ use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN}; use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer}; /// Start of a thematic break. /// @@ -62,17 +62,17 @@ use crate::tokenizer::{State, Tokenizer}; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.thematic_break { tokenizer.enter(Token::ThematicBreak); - tokenizer.go( - space_or_tab_min_max( - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - ), - before, - )(tokenizer) + let state_name = space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + ); + + tokenizer.go(state_name, StateName::ThematicBreakBefore) } else { State::Nok } @@ -84,7 +84,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | *** /// ^ /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'*' | b'-' | b'_') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); @@ -100,7 +100,7 @@ fn before(tokenizer: &mut Tokenizer) -> State { /// > | *** /// ^ /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => { tokenizer.tokenize_state.marker = 0; @@ -130,18 +130,19 @@ fn at_break(tokenizer: &mut Tokenizer) -> State { /// > | *** /// ^ /// ``` -fn sequence(tokenizer: &mut Tokenizer) -> State { +pub fn sequence(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'*' | b'-' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.consume(); tokenizer.tokenize_state.size += 1; - State::Fn(Box::new(sequence)) + State::Fn(StateName::ThematicBreakSequence) } _ => { tokenizer.exit(Token::ThematicBreakSequence); - tokenizer.attempt_opt(space_or_tab(), at_break)(tokenizer) + let state_name = space_or_tab(tokenizer); + tokenizer.attempt_opt(state_name, StateName::ThematicBreakAtBreak) } } } |