diff options
| author | 2022-08-09 10:45:15 +0200 | |
|---|---|---|
| committer | 2022-08-09 10:45:15 +0200 | |
| commit | 4ce1ac9e41cafa9051377470e8a246063f7d9b1a (patch) | |
| tree | d678d9583764b2706fe7ea4876e91e40609f15b0 | |
| parent | 8ffed1822bcbc1b6ce6647b840fb03996b0635ea (diff) | |
| download | markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.tar.gz markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.tar.bz2 markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.zip | |
Rewrite algorithm to not pass around boxed functions
*   Pass state names from an enum around instead of boxed functions
*   Refactor to simplify attempts a lot
*   Use a subtokenizer for the the `document` content type
34 files changed, 2065 insertions, 1194 deletions
| diff --git a/src/compiler.rs b/src/compiler.rs index b86fd82..57ab40a 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -482,28 +482,45 @@ fn on_enter_list(context: &mut CompileContext) {              // Blank line directly in list or directly in list item,              // but not a blank line after an empty list item.              if balance < 3 && event.token_type == Token::BlankLineEnding { -                let at_marker = balance == 2 -                    && events[skip::opt_back( -                        events, -                        index - 2, -                        &[Token::BlankLineEnding, Token::SpaceOrTab], -                    )] -                    .token_type -                        == Token::ListItemPrefix; -                let at_list_item = balance == 1 && events[index - 2].token_type == Token::ListItem; -                let at_empty_list_item = if at_list_item { -                    let before_item = skip::opt_back(events, index - 2, &[Token::ListItem]); -                    let before_prefix = skip::opt_back( -                        events, -                        index - 3, -                        &[Token::ListItemPrefix, Token::SpaceOrTab], -                    ); -                    before_item + 1 == before_prefix -                } else { -                    false -                }; +                let mut at_marker = false; + +                if balance == 2 { +                    let mut before = index - 2; + +                    if events[before].token_type == Token::SpaceOrTab { +                        before -= 2; +                    } + +                    if events[before].token_type == Token::ListItemPrefix { +                        at_marker = true; +                    } +                } + +                let mut at_empty_list_item = false; +                let mut at_empty_block_quote = false; + +                if balance == 1 { +                    let mut before = index - 2; + +                    if events[before].token_type == Token::SpaceOrTab { +                        before -= 2; +                    } + +                    if events[before].token_type == Token::ListItem +                        && events[before - 1].token_type == Token::ListItemPrefix +                    { +                        at_empty_list_item = true; +                    } + +                    if events[before].token_type == Token::ListItem +                        && events[before - 1].token_type == Token::BlockQuote +                        && events[before - 2].token_type == Token::BlockQuotePrefix +                    { +                        at_empty_block_quote = true; +                    } +                } -                if !at_marker && !at_list_item && !at_empty_list_item { +                if !at_marker && !at_empty_list_item && !at_empty_block_quote {                      loose = true;                      break;                  } diff --git a/src/construct/attention.rs b/src/construct/attention.rs index fc2acfb..5a98a89 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -52,7 +52,7 @@  //! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element  use crate::token::Token; -use crate::tokenizer::{Event, EventType, Point, State, Tokenizer}; +use crate::tokenizer::{Event, EventType, Point, State, StateName, Tokenizer};  use crate::unicode::PUNCTUATION;  use crate::util::slice::Slice; @@ -132,11 +132,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | **  ///     ^^  /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {              tokenizer.consume(); -            State::Fn(Box::new(inside)) +            State::Fn(StateName::AttentionInside)          }          _ => {              tokenizer.exit(Token::AttentionSequence); diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 1444c61..15bfac1 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -103,7 +103,7 @@  use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of an autolink.  /// @@ -121,7 +121,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              tokenizer.consume();              tokenizer.exit(Token::AutolinkMarker);              tokenizer.enter(Token::AutolinkProtocol); -            State::Fn(Box::new(open)) +            State::Fn(StateName::AutolinkOpen)          }          _ => State::Nok,      } @@ -135,12 +135,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | a<user@example.com>b  ///       ^  /// ``` -fn open(tokenizer: &mut Tokenizer) -> State { +pub fn open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII alphabetic.          Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(scheme_or_email_atext)) +            State::Fn(StateName::AutolinkSchemeOrEmailAtext)          }          _ => email_atext(tokenizer),      } @@ -154,7 +154,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {  /// > | a<user@example.com>b  ///        ^  /// ``` -fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { +pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII alphanumeric and `+`, `-`, and `.`.          Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { @@ -174,12 +174,12 @@ fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {  /// > | a<user@example.com>b  ///        ^  /// ``` -fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { +pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b':') => {              tokenizer.consume();              tokenizer.tokenize_state.size = 0; -            State::Fn(Box::new(url_inside)) +            State::Fn(StateName::AutolinkUrlInside)          }          // ASCII alphanumeric and `+`, `-`, and `.`.          Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') @@ -187,7 +187,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {          {              tokenizer.tokenize_state.size += 1;              tokenizer.consume(); -            State::Fn(Box::new(scheme_inside_or_email_atext)) +            State::Fn(StateName::AutolinkSchemeInsideOrEmailAtext)          }          _ => {              tokenizer.tokenize_state.size = 0; @@ -202,7 +202,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {  /// > | a<https://example.com>b  ///             ^  /// ``` -fn url_inside(tokenizer: &mut Tokenizer) -> State { +pub fn url_inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => {              tokenizer.exit(Token::AutolinkProtocol); @@ -212,7 +212,7 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {          None | Some(b'\0'..=0x1F | b' ' | b'<' | 0x7F) => State::Nok,          Some(_) => {              tokenizer.consume(); -            State::Fn(Box::new(url_inside)) +            State::Fn(StateName::AutolinkUrlInside)          }      }  } @@ -223,11 +223,11 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {  /// > | a<user.name@example.com>b  ///              ^  /// ``` -fn email_atext(tokenizer: &mut Tokenizer) -> State { +pub fn email_atext(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'@') => {              tokenizer.consume(); -            State::Fn(Box::new(email_at_sign_or_dot)) +            State::Fn(StateName::AutolinkEmailAtSignOrDot)          }          // ASCII atext.          // @@ -250,7 +250,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {              b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~',          ) => {              tokenizer.consume(); -            State::Fn(Box::new(email_atext)) +            State::Fn(StateName::AutolinkEmailAtext)          }          _ => State::Nok,      } @@ -262,7 +262,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {  /// > | a<user.name@example.com>b  ///                 ^       ^  /// ``` -fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { +pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII alphanumeric.          Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => email_value(tokenizer), @@ -276,12 +276,12 @@ fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {  /// > | a<user.name@example.com>b  ///                   ^  /// ``` -fn email_label(tokenizer: &mut Tokenizer) -> State { +pub fn email_label(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'.') => {              tokenizer.tokenize_state.size = 0;              tokenizer.consume(); -            State::Fn(Box::new(email_at_sign_or_dot)) +            State::Fn(StateName::AutolinkEmailAtSignOrDot)          }          Some(b'>') => {              tokenizer.tokenize_state.size = 0; @@ -304,20 +304,20 @@ fn email_label(tokenizer: &mut Tokenizer) -> State {  /// > | a<user.name@ex-ample.com>b  ///                    ^  /// ``` -fn email_value(tokenizer: &mut Tokenizer) -> State { +pub fn email_value(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII alphanumeric or `-`.          Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')              if tokenizer.tokenize_state.size < AUTOLINK_DOMAIN_SIZE_MAX =>          { -            let func = if matches!(tokenizer.current, Some(b'-')) { -                email_value +            let state_name = if matches!(tokenizer.current, Some(b'-')) { +                StateName::AutolinkEmailValue              } else { -                email_label +                StateName::AutolinkEmailLabel              };              tokenizer.tokenize_state.size += 1;              tokenizer.consume(); -            State::Fn(Box::new(func)) +            State::Fn(state_name)          }          _ => {              tokenizer.tokenize_state.size = 0; @@ -334,7 +334,7 @@ fn email_value(tokenizer: &mut Tokenizer) -> State {  /// > | a<user@example.com>b  ///                       ^  /// ``` -fn end(tokenizer: &mut Tokenizer) -> State { +pub fn end(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => {              tokenizer.enter(Token::AutolinkMarker); diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index c4eacf5..b12c2c4 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -33,7 +33,7 @@  //! [flow]: crate::content::flow  use crate::construct::partial_space_or_tab::space_or_tab; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of a blank line.  /// @@ -46,7 +46,8 @@ use crate::tokenizer::{State, Tokenizer};  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt_opt(space_or_tab(), after)(tokenizer) +    let state_name = space_or_tab(tokenizer); +    tokenizer.attempt_opt(state_name, StateName::BlankLineAfter)  }  /// After zero or more spaces or tabs, before a line ending or EOF. @@ -57,7 +58,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | ␊  ///     ^  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => State::Ok,          _ => State::Nok, diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 7e4753d..df58d62 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -36,7 +36,7 @@  use crate::constant::TAB_SIZE;  use crate::construct::partial_space_or_tab::space_or_tab_min_max;  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of block quote.  /// @@ -45,13 +45,17 @@ use crate::tokenizer::{State, Tokenizer};  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    let max = if tokenizer.parse_state.constructs.code_indented { -        TAB_SIZE - 1 -    } else { -        usize::MAX -    };      if tokenizer.parse_state.constructs.block_quote { -        tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer) +        let state_name = space_or_tab_min_max( +            tokenizer, +            0, +            if tokenizer.parse_state.constructs.code_indented { +                TAB_SIZE - 1 +            } else { +                usize::MAX +            }, +        ); +        tokenizer.go(state_name, StateName::BlockQuoteBefore)      } else {          State::Nok      } @@ -63,7 +67,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | > a  ///     ^  /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => {              tokenizer.enter(Token::BlockQuote); @@ -80,13 +84,17 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// > | > b  ///     ^  /// ``` -pub fn cont(tokenizer: &mut Tokenizer) -> State { -    let max = if tokenizer.parse_state.constructs.code_indented { -        TAB_SIZE - 1 -    } else { -        usize::MAX -    }; -    tokenizer.go(space_or_tab_min_max(0, max), cont_before)(tokenizer) +pub fn cont_start(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab_min_max( +        tokenizer, +        0, +        if tokenizer.parse_state.constructs.code_indented { +            TAB_SIZE - 1 +        } else { +            usize::MAX +        }, +    ); +    tokenizer.go(state_name, StateName::BlockQuoteContBefore)  }  /// After whitespace, before `>`. @@ -96,14 +104,14 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State {  /// > | > b  ///     ^  /// ``` -fn cont_before(tokenizer: &mut Tokenizer) -> State { +pub fn cont_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => {              tokenizer.enter(Token::BlockQuotePrefix);              tokenizer.enter(Token::BlockQuoteMarker);              tokenizer.consume();              tokenizer.exit(Token::BlockQuoteMarker); -            State::Fn(Box::new(cont_after)) +            State::Fn(StateName::BlockQuoteContAfter)          }          _ => State::Nok,      } @@ -117,15 +125,13 @@ fn cont_before(tokenizer: &mut Tokenizer) -> State {  /// > | >b  ///      ^  /// ``` -fn cont_after(tokenizer: &mut Tokenizer) -> State { +pub fn cont_after(tokenizer: &mut Tokenizer) -> State {      if let Some(b'\t' | b' ') = tokenizer.current {          tokenizer.enter(Token::SpaceOrTab);          tokenizer.consume();          tokenizer.exit(Token::SpaceOrTab); -        tokenizer.exit(Token::BlockQuotePrefix); -        State::Ok -    } else { -        tokenizer.exit(Token::BlockQuotePrefix); -        State::Ok      } + +    tokenizer.exit(Token::BlockQuotePrefix); +    State::Ok  } diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 4419d7a..de09f17 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -34,7 +34,7 @@  //! [hard_break_escape]: crate::construct::hard_break_escape  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of a character escape.  /// @@ -49,7 +49,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              tokenizer.enter(Token::CharacterEscapeMarker);              tokenizer.consume();              tokenizer.exit(Token::CharacterEscapeMarker); -            State::Fn(Box::new(inside)) +            State::Fn(StateName::CharacterEscapeInside)          }          _ => State::Nok,      } @@ -61,7 +61,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | a\*b  ///       ^  /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +// StateName::CharacterEscapeInside +pub fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII punctuation.          Some(b'!'..=b'/' | b':'..=b'@' | b'['..=b'`' | b'{'..=b'~') => { diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 7cc74ba..ba05fab 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -66,7 +66,7 @@ use crate::constant::{      CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, CHARACTER_REFERENCE_NAMED_SIZE_MAX,  };  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  use crate::util::slice::Slice;  /// Start of a character reference. @@ -86,7 +86,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              tokenizer.enter(Token::CharacterReferenceMarker);              tokenizer.consume();              tokenizer.exit(Token::CharacterReferenceMarker); -            State::Fn(Box::new(open)) +            State::Fn(StateName::CharacterReferenceOpen)          }          _ => State::Nok,      } @@ -103,12 +103,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | a	b  ///       ^  /// ``` -fn open(tokenizer: &mut Tokenizer) -> State { +// StateName::CharacterReferenceOpen +pub fn open(tokenizer: &mut Tokenizer) -> State {      if let Some(b'#') = tokenizer.current {          tokenizer.enter(Token::CharacterReferenceMarkerNumeric);          tokenizer.consume();          tokenizer.exit(Token::CharacterReferenceMarkerNumeric); -        State::Fn(Box::new(numeric)) +        State::Fn(StateName::CharacterReferenceNumeric)      } else {          tokenizer.tokenize_state.marker = b'&';          tokenizer.enter(Token::CharacterReferenceValue); @@ -125,14 +126,15 @@ fn open(tokenizer: &mut Tokenizer) -> State {  /// > | a	b  ///        ^  /// ``` -fn numeric(tokenizer: &mut Tokenizer) -> State { +// StateName::CharacterReferenceNumeric +pub fn numeric(tokenizer: &mut Tokenizer) -> State {      if let Some(b'x' | b'X') = tokenizer.current {          tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal);          tokenizer.consume();          tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal);          tokenizer.enter(Token::CharacterReferenceValue);          tokenizer.tokenize_state.marker = b'x'; -        State::Fn(Box::new(value)) +        State::Fn(StateName::CharacterReferenceValue)      } else {          tokenizer.enter(Token::CharacterReferenceValue);          tokenizer.tokenize_state.marker = b'#'; @@ -154,7 +156,7 @@ fn numeric(tokenizer: &mut Tokenizer) -> State {  /// > | a	b  ///         ^  /// ``` -fn value(tokenizer: &mut Tokenizer) -> State { +pub fn value(tokenizer: &mut Tokenizer) -> State {      if matches!(tokenizer.current, Some(b';')) && tokenizer.tokenize_state.size > 0 {          // Named.          if tokenizer.tokenize_state.marker == b'&' { @@ -200,7 +202,7 @@ fn value(tokenizer: &mut Tokenizer) -> State {          if tokenizer.tokenize_state.size < max && test(&byte) {              tokenizer.tokenize_state.size += 1;              tokenizer.consume(); -            return State::Fn(Box::new(value)); +            return State::Fn(StateName::CharacterReferenceValue);          }      } diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index a22a0f9..46c5f9f 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -102,12 +102,9 @@  //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element  use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE}; -use crate::construct::{ -    partial_non_lazy_continuation::start as partial_non_lazy_continuation, -    partial_space_or_tab::{space_or_tab, space_or_tab_min_max}, -}; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::token::Token; -use crate::tokenizer::{ContentType, State, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer};  use crate::util::slice::{Position, Slice};  /// Start of fenced code. @@ -122,17 +119,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {      if tokenizer.parse_state.constructs.code_fenced {          tokenizer.enter(Token::CodeFenced);          tokenizer.enter(Token::CodeFencedFence); -        tokenizer.go( -            space_or_tab_min_max( -                0, -                if tokenizer.parse_state.constructs.code_indented { -                    TAB_SIZE - 1 -                } else { -                    usize::MAX -                }, -            ), -            before_sequence_open, -        )(tokenizer) +        let state_name = space_or_tab_min_max( +            tokenizer, +            0, +            if tokenizer.parse_state.constructs.code_indented { +                TAB_SIZE - 1 +            } else { +                usize::MAX +            }, +        ); +        tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceOpen)      } else {          State::Nok      } @@ -146,7 +142,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  ///   | console.log(1)  ///   | ~~~  /// ``` -fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { +pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {      let tail = tokenizer.events.last();      let mut prefix = 0; @@ -178,16 +174,17 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {  ///   | console.log(1)  ///   | ~~~  /// ``` -fn sequence_open(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {              tokenizer.tokenize_state.size += 1;              tokenizer.consume(); -            State::Fn(Box::new(sequence_open)) +            State::Fn(StateName::CodeFencedSequenceOpen)          }          _ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => {              tokenizer.exit(Token::CodeFencedFenceSequence); -            tokenizer.attempt_opt(space_or_tab(), info_before)(tokenizer) +            let state_name = space_or_tab(tokenizer); +            tokenizer.attempt_opt(state_name, StateName::CodeFencedInfoBefore)          }          _ => {              tokenizer.tokenize_state.marker = 0; @@ -206,7 +203,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {  ///   | console.log(1)  ///   | ~~~  /// ``` -fn info_before(tokenizer: &mut Tokenizer) -> State { +pub fn info_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::CodeFencedFence); @@ -217,7 +214,7 @@ fn info_before(tokenizer: &mut Tokenizer) -> State {          _ => {              tokenizer.enter(Token::CodeFencedFenceInfo);              tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); -            info_inside(tokenizer) +            info(tokenizer)          }      }  } @@ -230,7 +227,7 @@ fn info_before(tokenizer: &mut Tokenizer) -> State {  ///   | console.log(1)  ///   | ~~~  /// ``` -fn info_inside(tokenizer: &mut Tokenizer) -> State { +pub fn info(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::Data); @@ -243,7 +240,8 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State {          Some(b'\t' | b' ') => {              tokenizer.exit(Token::Data);              tokenizer.exit(Token::CodeFencedFenceInfo); -            tokenizer.attempt_opt(space_or_tab(), meta_before)(tokenizer) +            let state_name = space_or_tab(tokenizer); +            tokenizer.attempt_opt(state_name, StateName::CodeFencedMetaBefore)          }          Some(b'`') if tokenizer.tokenize_state.marker == b'`' => {              tokenizer.concrete = false; @@ -254,7 +252,7 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State {          }          Some(_) => {              tokenizer.consume(); -            State::Fn(Box::new(info_inside)) +            State::Fn(StateName::CodeFencedInfo)          }      }  } @@ -267,7 +265,7 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State {  ///   | console.log(1)  ///   | ~~~  /// ``` -fn meta_before(tokenizer: &mut Tokenizer) -> State { +pub fn meta_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::CodeFencedFence); @@ -291,7 +289,7 @@ fn meta_before(tokenizer: &mut Tokenizer) -> State {  ///   | console.log(1)  ///   | ~~~  /// ``` -fn meta(tokenizer: &mut Tokenizer) -> State { +pub fn meta(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::Data); @@ -310,7 +308,7 @@ fn meta(tokenizer: &mut Tokenizer) -> State {          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(meta)) +            State::Fn(StateName::CodeFencedMeta)          }      }  } @@ -324,10 +322,14 @@ fn meta(tokenizer: &mut Tokenizer) -> State {  ///                   ^  ///   | ~~~  /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { -    tokenizer.check(partial_non_lazy_continuation, |ok| { -        Box::new(if ok { at_non_lazy_break } else { after }) -    })(tokenizer) +pub fn at_break(tokenizer: &mut Tokenizer) -> State { +    tokenizer.check(StateName::NonLazyContinuationStart, |ok| { +        State::Fn(if ok { +            StateName::CodeFencedAtNonLazyBreak +        } else { +            StateName::CodeFencedAfter +        }) +    })  }  /// At an eol/eof in code, before a non-lazy closing fence or content. @@ -339,10 +341,14 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {  ///                   ^  ///   | ~~~  /// ``` -fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt(close_begin, |ok| { -        Box::new(if ok { after } else { content_before }) -    })(tokenizer) +pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { +    tokenizer.attempt(StateName::CodeFencedCloseBefore, |ok| { +        State::Fn(if ok { +            StateName::CodeFencedAfter +        } else { +            StateName::CodeFencedContentBefore +        }) +    })  }  /// Before a closing fence, at the line ending. @@ -353,13 +359,13 @@ fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {  ///                   ^  ///   | ~~~  /// ``` -fn close_begin(tokenizer: &mut Tokenizer) -> State { +pub fn close_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); -            State::Fn(Box::new(close_start)) +            State::Fn(StateName::CodeFencedCloseStart)          }          _ => unreachable!("expected eol"),      } @@ -373,19 +379,18 @@ fn close_begin(tokenizer: &mut Tokenizer) -> State {  /// > | ~~~  ///     ^  /// ``` -fn close_start(tokenizer: &mut Tokenizer) -> State { +pub fn close_start(tokenizer: &mut Tokenizer) -> State {      tokenizer.enter(Token::CodeFencedFence); -    tokenizer.go( -        space_or_tab_min_max( -            0, -            if tokenizer.parse_state.constructs.code_indented { -                TAB_SIZE - 1 -            } else { -                usize::MAX -            }, -        ), -        close_before, -    )(tokenizer) +    let state_name = space_or_tab_min_max( +        tokenizer, +        0, +        if tokenizer.parse_state.constructs.code_indented { +            TAB_SIZE - 1 +        } else { +            usize::MAX +        }, +    ); +    tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceClose)  }  /// In a closing fence, after optional whitespace, before sequence. @@ -396,11 +401,11 @@ fn close_start(tokenizer: &mut Tokenizer) -> State {  /// > | ~~~  ///     ^  /// ``` -fn close_before(tokenizer: &mut Tokenizer) -> State { +pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {              tokenizer.enter(Token::CodeFencedFenceSequence); -            close_sequence(tokenizer) +            sequence_close(tokenizer)          }          _ => State::Nok,      } @@ -414,19 +419,20 @@ fn close_before(tokenizer: &mut Tokenizer) -> State {  /// > | ~~~  ///     ^  /// ``` -fn close_sequence(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {              tokenizer.tokenize_state.size_other += 1;              tokenizer.consume(); -            State::Fn(Box::new(close_sequence)) +            State::Fn(StateName::CodeFencedSequenceClose)          }          _ if tokenizer.tokenize_state.size_other >= CODE_FENCED_SEQUENCE_SIZE_MIN              && tokenizer.tokenize_state.size_other >= tokenizer.tokenize_state.size =>          {              tokenizer.tokenize_state.size_other = 0;              tokenizer.exit(Token::CodeFencedFenceSequence); -            tokenizer.attempt_opt(space_or_tab(), close_sequence_after)(tokenizer) +            let state_name = space_or_tab(tokenizer); +            tokenizer.attempt_opt(state_name, StateName::CodeFencedAfterSequenceClose)          }          _ => {              tokenizer.tokenize_state.size_other = 0; @@ -443,7 +449,7 @@ fn close_sequence(tokenizer: &mut Tokenizer) -> State {  /// > | ~~~  ///        ^  /// ``` -fn close_sequence_after(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::CodeFencedFence); @@ -461,11 +467,11 @@ fn close_sequence_after(tokenizer: &mut Tokenizer) -> State {  ///                   ^  ///   | ~~~  /// ``` -fn content_before(tokenizer: &mut Tokenizer) -> State { +pub fn content_before(tokenizer: &mut Tokenizer) -> State {      tokenizer.enter(Token::LineEnding);      tokenizer.consume();      tokenizer.exit(Token::LineEnding); -    State::Fn(Box::new(content_start)) +    State::Fn(StateName::CodeFencedContentStart)  }  /// Before code content, definitely not before a closing fence.  /// @@ -475,11 +481,9 @@ fn content_before(tokenizer: &mut Tokenizer) -> State {  ///     ^  ///   | ~~~  /// ``` -fn content_start(tokenizer: &mut Tokenizer) -> State { -    tokenizer.go( -        space_or_tab_min_max(0, tokenizer.tokenize_state.prefix), -        content_begin, -    )(tokenizer) +pub fn content_start(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.prefix); +    tokenizer.go(state_name, StateName::CodeFencedBeforeContentChunk)  }  /// Before code content, after a prefix. @@ -490,12 +494,12 @@ fn content_start(tokenizer: &mut Tokenizer) -> State {  ///     ^  ///   | ~~~  /// ``` -fn content_begin(tokenizer: &mut Tokenizer) -> State { +pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => at_break(tokenizer),          _ => {              tokenizer.enter(Token::CodeFlowChunk); -            content_continue(tokenizer) +            content_chunk(tokenizer)          }      }  } @@ -508,7 +512,7 @@ fn content_begin(tokenizer: &mut Tokenizer) -> State {  ///     ^^^^^^^^^^^^^^  ///   | ~~~  /// ``` -fn content_continue(tokenizer: &mut Tokenizer) -> State { +pub fn content_chunk(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::CodeFlowChunk); @@ -516,7 +520,7 @@ fn content_continue(tokenizer: &mut Tokenizer) -> State {          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(content_continue)) +            State::Fn(StateName::CodeFencedContentChunk)          }      }  } @@ -529,7 +533,7 @@ fn content_continue(tokenizer: &mut Tokenizer) -> State {  /// > | ~~~  ///        ^  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      tokenizer.exit(Token::CodeFenced);      tokenizer.tokenize_state.marker = 0;      tokenizer.tokenize_state.prefix = 0; diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 81a3080..516b493 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -48,7 +48,7 @@  use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::constant::TAB_SIZE;  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of code (indented).  /// @@ -64,7 +64,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {      // Do not interrupt paragraphs.      if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented {          tokenizer.enter(Token::CodeIndented); -        tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer) +        let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); +        tokenizer.go(state_name, StateName::CodeIndentedAtBreak)      } else {          State::Nok      } @@ -76,15 +77,19 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > |     aaa  ///         ^  ^  /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => after(tokenizer), -        Some(b'\n') => tokenizer.attempt(further_start, |ok| { -            Box::new(if ok { at_break } else { after }) -        })(tokenizer), +        Some(b'\n') => tokenizer.attempt(StateName::CodeIndentedFurtherStart, |ok| { +            State::Fn(if ok { +                StateName::CodeIndentedAtBreak +            } else { +                StateName::CodeIndentedAfter +            }) +        }),          _ => {              tokenizer.enter(Token::CodeFlowChunk); -            content(tokenizer) +            inside(tokenizer)          }      }  } @@ -95,7 +100,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {  /// > |     aaa  ///         ^^^^  /// ``` -fn content(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::CodeFlowChunk); @@ -103,7 +108,7 @@ fn content(tokenizer: &mut Tokenizer) -> State {          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(content)) +            State::Fn(StateName::CodeIndentedInside)          }      }  } @@ -114,7 +119,7 @@ fn content(tokenizer: &mut Tokenizer) -> State {  /// > |     aaa  ///            ^  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      tokenizer.exit(Token::CodeIndented);      // Feel free to interrupt.      tokenizer.interrupt = false; @@ -128,17 +133,24 @@ fn after(tokenizer: &mut Tokenizer) -> State {  ///            ^  ///   |     bbb  /// ``` -fn further_start(tokenizer: &mut Tokenizer) -> State { +pub fn further_start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n') if !tokenizer.lazy => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); -            State::Fn(Box::new(further_start)) +            State::Fn(StateName::CodeIndentedFurtherStart) +        } +        _ if !tokenizer.lazy => { +            let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); +            tokenizer.attempt(state_name, |ok| { +                State::Fn(if ok { +                    StateName::CodeIndentedFurtherEnd +                } else { +                    StateName::CodeIndentedFurtherBegin +                }) +            })          } -        _ if !tokenizer.lazy => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { -            Box::new(if ok { further_end } else { further_begin }) -        })(tokenizer),          _ => State::Nok,      }  } @@ -150,7 +162,7 @@ fn further_start(tokenizer: &mut Tokenizer) -> State {  ///             ^  ///    |     bbb  /// ``` -fn further_end(_tokenizer: &mut Tokenizer) -> State { +pub fn further_end(_tokenizer: &mut Tokenizer) -> State {      State::Ok  } @@ -161,8 +173,9 @@ fn further_end(_tokenizer: &mut Tokenizer) -> State {  /// > |   bbb  ///     ^  /// ``` -fn further_begin(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt_opt(space_or_tab(), further_after)(tokenizer) +pub fn further_begin(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab(tokenizer); +    tokenizer.attempt_opt(state_name, StateName::CodeIndentedFurtherAfter)  }  /// After whitespace, not indented enough. @@ -172,7 +185,7 @@ fn further_begin(tokenizer: &mut Tokenizer) -> State {  /// > |   bbb  ///       ^  /// ``` -fn further_after(tokenizer: &mut Tokenizer) -> State { +pub fn further_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n') => further_start(tokenizer),          _ => State::Nok, diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index 31777f4..5bdefbb 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -84,7 +84,7 @@  //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of code (text).  /// @@ -117,11 +117,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | `a`  ///     ^  /// ``` -fn sequence_open(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {      if let Some(b'`') = tokenizer.current {          tokenizer.tokenize_state.size += 1;          tokenizer.consume(); -        State::Fn(Box::new(sequence_open)) +        State::Fn(StateName::CodeTextSequenceOpen)      } else {          tokenizer.exit(Token::CodeTextSequence);          between(tokenizer) @@ -134,7 +134,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {  /// > | `a`  ///      ^^  /// ``` -fn between(tokenizer: &mut Tokenizer) -> State { +pub fn between(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => {              tokenizer.tokenize_state.size = 0; @@ -144,7 +144,7 @@ fn between(tokenizer: &mut Tokenizer) -> State {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); -            State::Fn(Box::new(between)) +            State::Fn(StateName::CodeTextBetween)          }          Some(b'`') => {              tokenizer.enter(Token::CodeTextSequence); @@ -163,7 +163,7 @@ fn between(tokenizer: &mut Tokenizer) -> State {  /// > | `a`  ///      ^  /// ``` -fn data(tokenizer: &mut Tokenizer) -> State { +pub fn data(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n' | b'`') => {              tokenizer.exit(Token::CodeTextData); @@ -171,7 +171,7 @@ fn data(tokenizer: &mut Tokenizer) -> State {          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(data)) +            State::Fn(StateName::CodeTextData)          }      }  } @@ -182,12 +182,12 @@ fn data(tokenizer: &mut Tokenizer) -> State {  /// > | `a`  ///       ^  /// ``` -fn sequence_close(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'`') => {              tokenizer.tokenize_state.size_other += 1;              tokenizer.consume(); -            State::Fn(Box::new(sequence_close)) +            State::Fn(StateName::CodeTextSequenceClose)          }          _ => {              if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_other { diff --git a/src/construct/definition.rs b/src/construct/definition.rs index a56dab4..fbad99d 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -93,14 +93,9 @@  //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element  //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element -use crate::construct::{ -    partial_destination::start as destination, -    partial_label::start as label, -    partial_space_or_tab::{space_or_tab, space_or_tab_eol}, -    partial_title::start as title, -}; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_eol};  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  use crate::util::skip::opt_back as skip_opt_back;  /// At the start of a definition. @@ -124,7 +119,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {      if possible && tokenizer.parse_state.constructs.definition {          tokenizer.enter(Token::Definition);          // Note: arbitrary whitespace allowed even if code (indented) is on. -        tokenizer.attempt_opt(space_or_tab(), before)(tokenizer) +        let state_name = space_or_tab(tokenizer); +        tokenizer.attempt_opt(state_name, StateName::DefinitionBefore)      } else {          State::Nok      } @@ -136,13 +132,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | [a]: b "c"  ///     ^  /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'[') => {              tokenizer.tokenize_state.token_1 = Token::DefinitionLabel;              tokenizer.tokenize_state.token_2 = Token::DefinitionLabelMarker;              tokenizer.tokenize_state.token_3 = Token::DefinitionLabelString; -            tokenizer.go(label, label_after)(tokenizer) +            tokenizer.go(StateName::LabelStart, StateName::DefinitionLabelAfter)          }          _ => State::Nok,      } @@ -154,7 +150,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// > | [a]: b "c"  ///        ^  /// ``` -fn label_after(tokenizer: &mut Tokenizer) -> State { +pub fn label_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::Data;      tokenizer.tokenize_state.token_2 = Token::Data;      tokenizer.tokenize_state.token_3 = Token::Data; @@ -164,34 +160,38 @@ fn label_after(tokenizer: &mut Tokenizer) -> State {              tokenizer.enter(Token::DefinitionMarker);              tokenizer.consume();              tokenizer.exit(Token::DefinitionMarker); -            State::Fn(Box::new( -                tokenizer.attempt_opt(space_or_tab_eol(), destination_before), -            )) +            State::Fn(StateName::DefinitionMarkerAfter)          }          _ => State::Nok,      }  } +/// To do. +pub fn marker_after(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab_eol(tokenizer); +    tokenizer.attempt_opt(state_name, StateName::DefinitionDestinationBefore) +} +  /// Before a destination.  ///  /// ```markdown  /// > | [a]: b "c"  ///          ^  /// ``` -fn destination_before(tokenizer: &mut Tokenizer) -> State { +pub fn destination_before(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::DefinitionDestination;      tokenizer.tokenize_state.token_2 = Token::DefinitionDestinationLiteral;      tokenizer.tokenize_state.token_3 = Token::DefinitionDestinationLiteralMarker;      tokenizer.tokenize_state.token_4 = Token::DefinitionDestinationRaw;      tokenizer.tokenize_state.token_5 = Token::DefinitionDestinationString;      tokenizer.tokenize_state.size_other = usize::MAX; -    tokenizer.attempt(destination, |ok| { -        Box::new(if ok { -            destination_after +    tokenizer.attempt(StateName::DestinationStart, |ok| { +        State::Fn(if ok { +            StateName::DefinitionDestinationAfter          } else { -            destination_missing +            StateName::DefinitionDestinationMissing          }) -    })(tokenizer) +    })  }  /// After a destination. @@ -200,18 +200,18 @@ fn destination_before(tokenizer: &mut Tokenizer) -> State {  /// > | [a]: b "c"  ///           ^  /// ``` -fn destination_after(tokenizer: &mut Tokenizer) -> State { +pub fn destination_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::Data;      tokenizer.tokenize_state.token_2 = Token::Data;      tokenizer.tokenize_state.token_3 = Token::Data;      tokenizer.tokenize_state.token_4 = Token::Data;      tokenizer.tokenize_state.token_5 = Token::Data;      tokenizer.tokenize_state.size_other = 0; -    tokenizer.attempt_opt(title_before, after)(tokenizer) +    tokenizer.attempt_opt(StateName::DefinitionTitleBefore, StateName::DefinitionAfter)  }  /// Without destination. -fn destination_missing(tokenizer: &mut Tokenizer) -> State { +pub fn destination_missing(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::Data;      tokenizer.tokenize_state.token_2 = Token::Data;      tokenizer.tokenize_state.token_3 = Token::Data; @@ -229,8 +229,9 @@ fn destination_missing(tokenizer: &mut Tokenizer) -> State {  /// > | [a]: b "c"  ///               ^  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt_opt(space_or_tab(), after_whitespace)(tokenizer) +pub fn after(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab(tokenizer); +    tokenizer.attempt_opt(state_name, StateName::DefinitionAfterWhitespace)  }  /// After a definition, after optional whitespace. @@ -241,7 +242,7 @@ fn after(tokenizer: &mut Tokenizer) -> State {  /// > | [a]: b "c"  ///               ^  /// ``` -fn after_whitespace(tokenizer: &mut Tokenizer) -> State { +pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::Definition); @@ -261,8 +262,9 @@ fn after_whitespace(tokenizer: &mut Tokenizer) -> State {  /// > | [a]: b "c"  ///           ^  /// ``` -fn title_before(tokenizer: &mut Tokenizer) -> State { -    tokenizer.go(space_or_tab_eol(), title_before_marker)(tokenizer) +pub fn title_before(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab_eol(tokenizer); +    tokenizer.go(state_name, StateName::DefinitionTitleBeforeMarker)  }  /// Before a title, after a line ending. @@ -272,11 +274,11 @@ fn title_before(tokenizer: &mut Tokenizer) -> State {  /// > | "c"  ///     ^  /// ``` -fn title_before_marker(tokenizer: &mut Tokenizer) -> State { +pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::DefinitionTitle;      tokenizer.tokenize_state.token_2 = Token::DefinitionTitleMarker;      tokenizer.tokenize_state.token_3 = Token::DefinitionTitleString; -    tokenizer.go(title, title_after)(tokenizer) +    tokenizer.go(StateName::TitleStart, StateName::DefinitionTitleAfter)  }  /// After a title. @@ -285,11 +287,15 @@ fn title_before_marker(tokenizer: &mut Tokenizer) -> State {  /// > | [a]: b "c"  ///               ^  /// ``` -fn title_after(tokenizer: &mut Tokenizer) -> State { +pub fn title_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::Data;      tokenizer.tokenize_state.token_2 = Token::Data;      tokenizer.tokenize_state.token_3 = Token::Data; -    tokenizer.attempt_opt(space_or_tab(), title_after_after_optional_whitespace)(tokenizer) +    let state_name = space_or_tab(tokenizer); +    tokenizer.attempt_opt( +        state_name, +        StateName::DefinitionTitleAfterOptionalWhitespace, +    )  }  /// After a title, after optional whitespace. @@ -298,7 +304,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {  /// > | [a]: b "c"  ///               ^  /// ``` -fn title_after_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State { +pub fn title_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => State::Ok,          _ => State::Nok, diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index d09bf54..47b7e94 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -40,7 +40,7 @@  //! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of a hard break (escape).  /// @@ -54,7 +54,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {          Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => {              tokenizer.enter(Token::HardBreakEscape);              tokenizer.consume(); -            State::Fn(Box::new(after)) +            State::Fn(StateName::HardBreakEscapeAfter)          }          _ => State::Nok,      } @@ -67,7 +67,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  ///       ^  ///   | b  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n') => {              tokenizer.exit(Token::HardBreakEscape); diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 6751567..45c4758 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -57,7 +57,7 @@  use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};  use crate::token::Token; -use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer}; +use crate::tokenizer::{ContentType, Event, EventType, State, StateName, Tokenizer};  /// Start of a heading (atx).  /// @@ -68,17 +68,16 @@ use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer};  pub fn start(tokenizer: &mut Tokenizer) -> State {      if tokenizer.parse_state.constructs.heading_atx {          tokenizer.enter(Token::HeadingAtx); -        tokenizer.go( -            space_or_tab_min_max( -                0, -                if tokenizer.parse_state.constructs.code_indented { -                    TAB_SIZE - 1 -                } else { -                    usize::MAX -                }, -            ), -            before, -        )(tokenizer) +        let state_name = space_or_tab_min_max( +            tokenizer, +            0, +            if tokenizer.parse_state.constructs.code_indented { +                TAB_SIZE - 1 +            } else { +                usize::MAX +            }, +        ); +        tokenizer.go(state_name, StateName::HeadingAtxBefore)      } else {          State::Nok      } @@ -90,7 +89,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | ## aa  ///     ^  /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State {      if Some(b'#') == tokenizer.current {          tokenizer.enter(Token::HeadingAtxSequence);          sequence_open(tokenizer) @@ -105,7 +104,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// > | ## aa  ///     ^  /// ``` -fn sequence_open(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => {              tokenizer.tokenize_state.size = 0; @@ -115,12 +114,13 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {          Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {              tokenizer.tokenize_state.size += 1;              tokenizer.consume(); -            State::Fn(Box::new(sequence_open)) +            State::Fn(StateName::HeadingAtxSequenceOpen)          }          _ if tokenizer.tokenize_state.size > 0 => {              tokenizer.tokenize_state.size = 0;              tokenizer.exit(Token::HeadingAtxSequence); -            tokenizer.go(space_or_tab(), at_break)(tokenizer) +            let state_name = space_or_tab(tokenizer); +            tokenizer.go(state_name, StateName::HeadingAtxAtBreak)          }          _ => {              tokenizer.tokenize_state.size = 0; @@ -135,7 +135,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {  /// > | ## aa  ///       ^  /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::HeadingAtx); @@ -144,10 +144,13 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {              tokenizer.interrupt = false;              State::Ok          } -        Some(b'\t' | b' ') => tokenizer.go(space_or_tab(), at_break)(tokenizer), +        Some(b'\t' | b' ') => { +            let state_name = space_or_tab(tokenizer); +            tokenizer.go(state_name, StateName::HeadingAtxAtBreak) +        }          Some(b'#') => {              tokenizer.enter(Token::HeadingAtxSequence); -            further_sequence(tokenizer) +            sequence_further(tokenizer)          }          Some(_) => {              tokenizer.enter_with_content(Token::Data, Some(ContentType::Text)); @@ -164,10 +167,10 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {  /// > | ## aa ##  ///           ^  /// ``` -fn further_sequence(tokenizer: &mut Tokenizer) -> State { +pub fn sequence_further(tokenizer: &mut Tokenizer) -> State {      if let Some(b'#') = tokenizer.current {          tokenizer.consume(); -        State::Fn(Box::new(further_sequence)) +        State::Fn(StateName::HeadingAtxSequenceFurther)      } else {          tokenizer.exit(Token::HeadingAtxSequence);          at_break(tokenizer) @@ -180,7 +183,7 @@ fn further_sequence(tokenizer: &mut Tokenizer) -> State {  /// > | ## aa  ///        ^  /// ``` -fn data(tokenizer: &mut Tokenizer) -> State { +pub fn data(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.          None | Some(b'\t' | b'\n' | b' ') => { @@ -189,7 +192,7 @@ fn data(tokenizer: &mut Tokenizer) -> State {          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(data)) +            State::Fn(StateName::HeadingAtxData)          }      }  } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 675b2ac..50feba4 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -60,7 +60,7 @@  use crate::constant::TAB_SIZE;  use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::token::Token; -use crate::tokenizer::{EventType, State, Tokenizer}; +use crate::tokenizer::{EventType, State, StateName, Tokenizer};  use crate::util::skip::opt_back as skip_opt_back;  /// At a line ending, presumably an underline. @@ -83,17 +83,17 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              .token_type                  == Token::Paragraph)      { -        tokenizer.go( -            space_or_tab_min_max( -                0, -                if tokenizer.parse_state.constructs.code_indented { -                    TAB_SIZE - 1 -                } else { -                    usize::MAX -                }, -            ), -            before, -        )(tokenizer) +        let state_name = space_or_tab_min_max( +            tokenizer, +            0, +            if tokenizer.parse_state.constructs.code_indented { +                TAB_SIZE - 1 +            } else { +                usize::MAX +            }, +        ); + +        tokenizer.go(state_name, StateName::HeadingSetextBefore)      } else {          State::Nok      } @@ -106,7 +106,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | ==  ///     ^  /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'-' | b'=') => {              tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); @@ -124,16 +124,17 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// > | ==  ///     ^  /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'-' | b'=') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {              tokenizer.consume(); -            State::Fn(Box::new(inside)) +            State::Fn(StateName::HeadingSetextInside)          }          _ => {              tokenizer.tokenize_state.marker = 0;              tokenizer.exit(Token::HeadingSetextUnderline); -            tokenizer.attempt_opt(space_or_tab(), after)(tokenizer) +            let state_name = space_or_tab(tokenizer); +            tokenizer.attempt_opt(state_name, StateName::HeadingSetextAfter)          }      }  } @@ -145,7 +146,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {  /// > | ==  ///       ^  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              // Feel free to interrupt. diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index aaa803d..779146c 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -101,13 +101,11 @@  use crate::constant::{      HTML_BLOCK_NAMES, HTML_CDATA_PREFIX, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE,  }; -use crate::construct::{ -    blank_line::start as blank_line, -    partial_non_lazy_continuation::start as partial_non_lazy_continuation, -    partial_space_or_tab::{space_or_tab_with_options, Options as SpaceOrTabOptions}, +use crate::construct::partial_space_or_tab::{ +    space_or_tab_with_options, Options as SpaceOrTabOptions,  };  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  use crate::util::slice::Slice;  /// Symbol for `<script>` (condition 1). @@ -134,8 +132,9 @@ const COMPLETE: u8 = 7;  pub fn start(tokenizer: &mut Tokenizer) -> State {      if tokenizer.parse_state.constructs.html_flow {          tokenizer.enter(Token::HtmlFlow); -        tokenizer.go( -            space_or_tab_with_options(SpaceOrTabOptions { +        let state_name = space_or_tab_with_options( +            tokenizer, +            SpaceOrTabOptions {                  kind: Token::HtmlFlowData,                  min: 0,                  max: if tokenizer.parse_state.constructs.code_indented { @@ -145,9 +144,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {                  },                  connect: false,                  content_type: None, -            }), -            before, -        )(tokenizer) +            }, +        ); + +        tokenizer.go(state_name, StateName::HtmlFlowBefore)      } else {          State::Nok      } @@ -159,11 +159,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | <x />  ///     ^  /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State {      if Some(b'<') == tokenizer.current {          tokenizer.enter(Token::HtmlFlowData);          tokenizer.consume(); -        State::Fn(Box::new(open)) +        State::Fn(StateName::HtmlFlowOpen)      } else {          State::Nok      } @@ -179,17 +179,17 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// > | <!--xxx-->  ///      ^  /// ``` -fn open(tokenizer: &mut Tokenizer) -> State { +pub fn open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'!') => {              tokenizer.consume(); -            State::Fn(Box::new(declaration_open)) +            State::Fn(StateName::HtmlFlowDeclarationOpen)          }          Some(b'/') => {              tokenizer.consume();              tokenizer.tokenize_state.seen = true;              tokenizer.tokenize_state.start = tokenizer.point.index; -            State::Fn(Box::new(tag_close_start)) +            State::Fn(StateName::HtmlFlowTagCloseStart)          }          Some(b'?') => {              tokenizer.tokenize_state.marker = INSTRUCTION; @@ -198,7 +198,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {              tokenizer.concrete = true;              // While we’re in an instruction instead of a declaration, we’re on a `?`              // right now, so we do need to search for `>`, similar to declarations. -            State::Fn(Box::new(continuation_declaration_inside)) +            State::Fn(StateName::HtmlFlowContinuationDeclarationInside)          }          // ASCII alphabetical.          Some(b'A'..=b'Z' | b'a'..=b'z') => { @@ -219,24 +219,24 @@ fn open(tokenizer: &mut Tokenizer) -> State {  /// > | <![CDATA[>&<]]>  ///       ^  /// ``` -fn declaration_open(tokenizer: &mut Tokenizer) -> State { +pub fn declaration_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'-') => {              tokenizer.consume();              tokenizer.tokenize_state.marker = COMMENT; -            State::Fn(Box::new(comment_open_inside)) +            State::Fn(StateName::HtmlFlowCommentOpenInside)          }          Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume();              tokenizer.tokenize_state.marker = DECLARATION;              // Do not form containers.              tokenizer.concrete = true; -            State::Fn(Box::new(continuation_declaration_inside)) +            State::Fn(StateName::HtmlFlowContinuationDeclarationInside)          }          Some(b'[') => {              tokenizer.consume();              tokenizer.tokenize_state.marker = CDATA; -            State::Fn(Box::new(cdata_open_inside)) +            State::Fn(StateName::HtmlFlowCdataOpenInside)          }          _ => State::Nok,      } @@ -248,12 +248,12 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {  /// > | <!--xxx-->  ///        ^  /// ``` -fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { +pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {      if let Some(b'-') = tokenizer.current {          tokenizer.consume();          // Do not form containers.          tokenizer.concrete = true; -        State::Fn(Box::new(continuation_declaration_inside)) +        State::Fn(StateName::HtmlFlowContinuationDeclarationInside)      } else {          tokenizer.tokenize_state.marker = 0;          State::Nok @@ -266,7 +266,7 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {  /// > | <![CDATA[>&<]]>  ///        ^^^^^^  /// ``` -fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { +pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {      if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) {          tokenizer.tokenize_state.size += 1;          tokenizer.consume(); @@ -275,9 +275,9 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {              tokenizer.tokenize_state.size = 0;              // Do not form containers.              tokenizer.concrete = true; -            State::Fn(Box::new(continuation)) +            State::Fn(StateName::HtmlFlowContinuation)          } else { -            State::Fn(Box::new(cdata_open_inside)) +            State::Fn(StateName::HtmlFlowCdataOpenInside)          }      } else {          tokenizer.tokenize_state.marker = 0; @@ -292,10 +292,10 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {  /// > | </x>  ///       ^  /// ``` -fn tag_close_start(tokenizer: &mut Tokenizer) -> State { +pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State {      if let Some(b'A'..=b'Z' | b'a'..=b'z') = tokenizer.current {          tokenizer.consume(); -        State::Fn(Box::new(tag_name)) +        State::Fn(StateName::HtmlFlowTagName)      } else {          tokenizer.tokenize_state.seen = false;          tokenizer.tokenize_state.start = 0; @@ -311,7 +311,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State {  /// > | </ab>  ///       ^^  /// ``` -fn tag_name(tokenizer: &mut Tokenizer) -> State { +pub fn tag_name(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => {              let closing_tag = tokenizer.tokenize_state.seen; @@ -340,7 +340,7 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State {                  if slash {                      tokenizer.consume(); -                    State::Fn(Box::new(basic_self_closing)) +                    State::Fn(StateName::HtmlFlowBasicSelfClosing)                  } else {                      // Do not form containers.                      tokenizer.concrete = true; @@ -363,7 +363,7 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State {          // ASCII alphanumerical and `-`.          Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_name)) +            State::Fn(StateName::HtmlFlowTagName)          }          Some(_) => {              tokenizer.tokenize_state.seen = false; @@ -378,12 +378,12 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State {  /// > | <div/>  ///          ^  /// ``` -fn basic_self_closing(tokenizer: &mut Tokenizer) -> State { +pub fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {      if let Some(b'>') = tokenizer.current {          tokenizer.consume();          // Do not form containers.          tokenizer.concrete = true; -        State::Fn(Box::new(continuation)) +        State::Fn(StateName::HtmlFlowContinuation)      } else {          tokenizer.tokenize_state.marker = 0;          State::Nok @@ -396,11 +396,11 @@ fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {  /// > | <x/>  ///        ^  /// ``` -fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State { +pub fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\t' | b' ') => {              tokenizer.consume(); -            State::Fn(Box::new(complete_closing_tag_after)) +            State::Fn(StateName::HtmlFlowCompleteClosingTagAfter)          }          _ => complete_end(tokenizer),      } @@ -425,20 +425,20 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {  /// > | <a >  ///        ^  /// ``` -fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\t' | b' ') => {              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_name_before)) +            State::Fn(StateName::HtmlFlowCompleteAttributeNameBefore)          }          Some(b'/') => {              tokenizer.consume(); -            State::Fn(Box::new(complete_end)) +            State::Fn(StateName::HtmlFlowCompleteEnd)          }          // ASCII alphanumerical and `:` and `_`.          Some(b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_name)) +            State::Fn(StateName::HtmlFlowCompleteAttributeName)          }          _ => complete_end(tokenizer),      } @@ -454,12 +454,12 @@ fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {  /// > | <a b>  ///         ^  /// ``` -fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII alphanumerical and `-`, `.`, `:`, and `_`.          Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_name)) +            State::Fn(StateName::HtmlFlowCompleteAttributeName)          }          _ => complete_attribute_name_after(tokenizer),      } @@ -474,15 +474,15 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {  /// > | <a b=c>  ///         ^  /// ``` -fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\t' | b' ') => {              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_name_after)) +            State::Fn(StateName::HtmlFlowCompleteAttributeNameAfter)          }          Some(b'=') => {              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_value_before)) +            State::Fn(StateName::HtmlFlowCompleteAttributeValueBefore)          }          _ => complete_attribute_name_before(tokenizer),      } @@ -497,7 +497,7 @@ fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {  /// > | <a b="c">  ///          ^  /// ``` -fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'<' | b'=' | b'>' | b'`') => {              tokenizer.tokenize_state.marker = 0; @@ -505,12 +505,12 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {          }          Some(b'\t' | b' ') => {              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_value_before)) +            State::Fn(StateName::HtmlFlowCompleteAttributeValueBefore)          }          Some(b'"' | b'\'') => {              tokenizer.tokenize_state.marker_other = tokenizer.current.unwrap();              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_value_quoted)) +            State::Fn(StateName::HtmlFlowCompleteAttributeValueQuoted)          }          _ => complete_attribute_value_unquoted(tokenizer),      } @@ -524,7 +524,7 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {  /// > | <a b='c'>  ///           ^  /// ``` -fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.tokenize_state.marker = 0; @@ -536,11 +536,11 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {          {              tokenizer.tokenize_state.marker_other = 0;              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_value_quoted_after)) +            State::Fn(StateName::HtmlFlowCompleteAttributeValueQuotedAfter)          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_value_quoted)) +            State::Fn(StateName::HtmlFlowCompleteAttributeValueQuoted)          }      }  } @@ -551,14 +551,14 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {  /// > | <a b=c>  ///          ^  /// ``` -fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => {              complete_attribute_name_after(tokenizer)          }          Some(_) => {              tokenizer.consume(); -            State::Fn(Box::new(complete_attribute_value_unquoted)) +            State::Fn(StateName::HtmlFlowCompleteAttributeValueUnquoted)          }      }  } @@ -570,7 +570,7 @@ fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {  /// > | <a b="c">  ///            ^  /// ``` -fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { +pub fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {      if let Some(b'\t' | b' ' | b'/' | b'>') = tokenizer.current {          complete_attribute_name_before(tokenizer)      } else { @@ -585,10 +585,10 @@ fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {  /// > | <a b="c">  ///             ^  /// ``` -fn complete_end(tokenizer: &mut Tokenizer) -> State { +pub fn complete_end(tokenizer: &mut Tokenizer) -> State {      if let Some(b'>') = tokenizer.current {          tokenizer.consume(); -        State::Fn(Box::new(complete_after)) +        State::Fn(StateName::HtmlFlowCompleteAfter)      } else {          tokenizer.tokenize_state.marker = 0;          State::Nok @@ -601,7 +601,7 @@ fn complete_end(tokenizer: &mut Tokenizer) -> State {  /// > | <x>  ///        ^  /// ``` -fn complete_after(tokenizer: &mut Tokenizer) -> State { +pub fn complete_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              // Do not form containers. @@ -610,7 +610,7 @@ fn complete_after(tokenizer: &mut Tokenizer) -> State {          }          Some(b'\t' | b' ') => {              tokenizer.consume(); -            State::Fn(Box::new(complete_after)) +            State::Fn(StateName::HtmlFlowCompleteAfter)          }          Some(_) => {              tokenizer.tokenize_state.marker = 0; @@ -625,20 +625,20 @@ fn complete_after(tokenizer: &mut Tokenizer) -> State {  /// > | <!--xxx-->  ///          ^  /// ``` -fn continuation(tokenizer: &mut Tokenizer) -> State { +pub fn continuation(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n')              if tokenizer.tokenize_state.marker == BASIC                  || tokenizer.tokenize_state.marker == COMPLETE =>          {              tokenizer.exit(Token::HtmlFlowData); -            tokenizer.check(blank_line_before, |ok| { -                Box::new(if ok { -                    continuation_after +            tokenizer.check(StateName::HtmlFlowBlankLineBefore, |ok| { +                State::Fn(if ok { +                    StateName::HtmlFlowContinuationAfter                  } else { -                    continuation_start +                    StateName::HtmlFlowContinuationStart                  }) -            })(tokenizer) +            })          }          // Note: important that this is after the basic/complete case.          None | Some(b'\n') => { @@ -647,27 +647,27 @@ fn continuation(tokenizer: &mut Tokenizer) -> State {          }          Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_comment_inside)) +            State::Fn(StateName::HtmlFlowContinuationCommentInside)          }          Some(b'<') if tokenizer.tokenize_state.marker == RAW => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_raw_tag_open)) +            State::Fn(StateName::HtmlFlowContinuationRawTagOpen)          }          Some(b'>') if tokenizer.tokenize_state.marker == DECLARATION => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_close)) +            State::Fn(StateName::HtmlFlowContinuationClose)          }          Some(b'?') if tokenizer.tokenize_state.marker == INSTRUCTION => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_declaration_inside)) +            State::Fn(StateName::HtmlFlowContinuationDeclarationInside)          }          Some(b']') if tokenizer.tokenize_state.marker == CDATA => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_character_data_inside)) +            State::Fn(StateName::HtmlFlowContinuationCdataInside)          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(continuation)) +            State::Fn(StateName::HtmlFlowContinuation)          }      }  } @@ -679,14 +679,14 @@ fn continuation(tokenizer: &mut Tokenizer) -> State {  ///        ^  ///   | asd  /// ``` -fn continuation_start(tokenizer: &mut Tokenizer) -> State { -    tokenizer.check(partial_non_lazy_continuation, |ok| { -        Box::new(if ok { -            continuation_start_non_lazy +pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { +    tokenizer.check(StateName::NonLazyContinuationStart, |ok| { +        State::Fn(if ok { +            StateName::HtmlFlowContinuationStartNonLazy          } else { -            continuation_after +            StateName::HtmlFlowContinuationAfter          }) -    })(tokenizer) +    })  }  /// In continuation, at an eol, before non-lazy content. @@ -696,13 +696,13 @@ fn continuation_start(tokenizer: &mut Tokenizer) -> State {  ///        ^  ///   | asd  /// ``` -fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); -            State::Fn(Box::new(continuation_before)) +            State::Fn(StateName::HtmlFlowContinuationBefore)          }          _ => unreachable!("expected eol"),      } @@ -715,7 +715,7 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {  /// > | asd  ///     ^  /// ``` -fn continuation_before(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => continuation_start(tokenizer),          _ => { @@ -731,11 +731,11 @@ fn continuation_before(tokenizer: &mut Tokenizer) -> State {  /// > | <!--xxx-->  ///             ^  /// ``` -fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'-') => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_declaration_inside)) +            State::Fn(StateName::HtmlFlowContinuationDeclarationInside)          }          _ => continuation(tokenizer),      } @@ -747,12 +747,12 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {  /// > | <script>console.log(1)</script>  ///                            ^  /// ``` -fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'/') => {              tokenizer.consume();              tokenizer.tokenize_state.start = tokenizer.point.index; -            State::Fn(Box::new(continuation_raw_end_tag)) +            State::Fn(StateName::HtmlFlowContinuationRawEndTag)          }          _ => continuation(tokenizer),      } @@ -764,7 +764,7 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {  /// > | <script>console.log(1)</script>  ///                             ^^^^^^  /// ``` -fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => {              // Guaranteed to be valid ASCII bytes. @@ -779,7 +779,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {              if HTML_RAW_NAMES.contains(&name.as_str()) {                  tokenizer.consume(); -                State::Fn(Box::new(continuation_close)) +                State::Fn(StateName::HtmlFlowContinuationClose)              } else {                  continuation(tokenizer)              } @@ -788,7 +788,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {              if tokenizer.point.index - tokenizer.tokenize_state.start < HTML_RAW_SIZE_MAX =>          {              tokenizer.consume(); -            State::Fn(Box::new(continuation_raw_end_tag)) +            State::Fn(StateName::HtmlFlowContinuationRawEndTag)          }          _ => {              tokenizer.tokenize_state.start = 0; @@ -803,11 +803,11 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {  /// > | <![CDATA[>&<]]>  ///                  ^  /// ``` -fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b']') => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_declaration_inside)) +            State::Fn(StateName::HtmlFlowContinuationDeclarationInside)          }          _ => continuation(tokenizer),      } @@ -827,15 +827,15 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State {  /// > | <![CDATA[>&<]]>  ///                   ^  /// ``` -fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_close)) +            State::Fn(StateName::HtmlFlowContinuationClose)          }          Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_declaration_inside)) +            State::Fn(StateName::HtmlFlowContinuationDeclarationInside)          }          _ => continuation(tokenizer),      } @@ -847,7 +847,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {  /// > | <!doctype>  ///               ^  /// ``` -fn continuation_close(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::HtmlFlowData); @@ -855,7 +855,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State {          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(continuation_close)) +            State::Fn(StateName::HtmlFlowContinuationClose)          }      }  } @@ -866,7 +866,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State {  /// > | <!doctype>  ///               ^  /// ``` -fn continuation_after(tokenizer: &mut Tokenizer) -> State { +pub fn continuation_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.exit(Token::HtmlFlow);      tokenizer.tokenize_state.marker = 0;      // Feel free to interrupt. @@ -883,9 +883,9 @@ fn continuation_after(tokenizer: &mut Tokenizer) -> State {  ///          ^  ///   |  /// ``` -fn blank_line_before(tokenizer: &mut Tokenizer) -> State { +pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {      tokenizer.enter(Token::LineEnding);      tokenizer.consume();      tokenizer.exit(Token::LineEnding); -    State::Fn(Box::new(blank_line)) +    State::Fn(StateName::BlankLineStart)  } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index a4c0349..1c1f9e6 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -57,7 +57,7 @@  use crate::constant::HTML_CDATA_PREFIX;  use crate::construct::partial_space_or_tab::space_or_tab;  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of HTML (text)  /// @@ -70,7 +70,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {          tokenizer.enter(Token::HtmlText);          tokenizer.enter(Token::HtmlTextData);          tokenizer.consume(); -        State::Fn(Box::new(open)) +        State::Fn(StateName::HtmlTextOpen)      } else {          State::Nok      } @@ -86,24 +86,24 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | a <!--b--> c  ///        ^  /// ``` -fn open(tokenizer: &mut Tokenizer) -> State { +pub fn open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'!') => {              tokenizer.consume(); -            State::Fn(Box::new(declaration_open)) +            State::Fn(StateName::HtmlTextDeclarationOpen)          }          Some(b'/') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_close_start)) +            State::Fn(StateName::HtmlTextTagCloseStart)          }          Some(b'?') => {              tokenizer.consume(); -            State::Fn(Box::new(instruction)) +            State::Fn(StateName::HtmlTextInstruction)          }          // ASCII alphabetical.          Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open)) +            State::Fn(StateName::HtmlTextTagOpen)          }          _ => State::Nok,      } @@ -119,20 +119,20 @@ fn open(tokenizer: &mut Tokenizer) -> State {  /// > | a <![CDATA[>&<]]> c  ///         ^  /// ``` -fn declaration_open(tokenizer: &mut Tokenizer) -> State { +pub fn declaration_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'-') => {              tokenizer.consume(); -            State::Fn(Box::new(comment_open_inside)) +            State::Fn(StateName::HtmlTextCommentOpenInside)          }          // ASCII alphabetical.          Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(declaration)) +            State::Fn(StateName::HtmlTextDeclaration)          }          Some(b'[') => {              tokenizer.consume(); -            State::Fn(Box::new(cdata_open_inside)) +            State::Fn(StateName::HtmlTextCdataOpenInside)          }          _ => State::Nok,      } @@ -144,11 +144,11 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {  /// > | a <!--b--> c  ///          ^  /// ``` -fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { +pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'-') => {              tokenizer.consume(); -            State::Fn(Box::new(comment_start)) +            State::Fn(StateName::HtmlTextCommentStart)          }          _ => State::Nok,      } @@ -167,12 +167,12 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {  /// ```  ///  /// [html_flow]: crate::construct::html_flow -fn comment_start(tokenizer: &mut Tokenizer) -> State { +pub fn comment_start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => State::Nok,          Some(b'-') => {              tokenizer.consume(); -            State::Fn(Box::new(comment_start_dash)) +            State::Fn(StateName::HtmlTextCommentStartDash)          }          _ => comment(tokenizer),      } @@ -191,7 +191,7 @@ fn comment_start(tokenizer: &mut Tokenizer) -> State {  /// ```  ///  /// [html_flow]: crate::construct::html_flow -fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { +pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => State::Nok,          _ => comment(tokenizer), @@ -204,20 +204,20 @@ fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {  /// > | a <!--b--> c  ///           ^  /// ``` -fn comment(tokenizer: &mut Tokenizer) -> State { +pub fn comment(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Nok,          Some(b'\n') => { -            tokenizer.tokenize_state.return_state = Some(Box::new(comment)); -            at_line_ending(tokenizer) +            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextComment); +            line_ending_before(tokenizer)          }          Some(b'-') => {              tokenizer.consume(); -            State::Fn(Box::new(comment_close)) +            State::Fn(StateName::HtmlTextCommentClose)          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(comment)) +            State::Fn(StateName::HtmlTextComment)          }      }  } @@ -228,11 +228,11 @@ fn comment(tokenizer: &mut Tokenizer) -> State {  /// > | a <!--b--> c  ///             ^  /// ``` -fn comment_close(tokenizer: &mut Tokenizer) -> State { +pub fn comment_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'-') => {              tokenizer.consume(); -            State::Fn(Box::new(end)) +            State::Fn(StateName::HtmlTextEnd)          }          _ => comment(tokenizer),      } @@ -244,16 +244,16 @@ fn comment_close(tokenizer: &mut Tokenizer) -> State {  /// > | a <![CDATA[>&<]]> b  ///          ^^^^^^  /// ``` -fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { +pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {      if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) {          tokenizer.tokenize_state.size += 1;          tokenizer.consume();          if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() {              tokenizer.tokenize_state.size = 0; -            State::Fn(Box::new(cdata)) +            State::Fn(StateName::HtmlTextCdata)          } else { -            State::Fn(Box::new(cdata_open_inside)) +            State::Fn(StateName::HtmlTextCdataOpenInside)          }      } else {          State::Nok @@ -266,20 +266,20 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {  /// > | a <![CDATA[>&<]]> b  ///                ^^^  /// ``` -fn cdata(tokenizer: &mut Tokenizer) -> State { +pub fn cdata(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Nok,          Some(b'\n') => { -            tokenizer.tokenize_state.return_state = Some(Box::new(cdata)); -            at_line_ending(tokenizer) +            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextCdata); +            line_ending_before(tokenizer)          }          Some(b']') => {              tokenizer.consume(); -            State::Fn(Box::new(cdata_close)) +            State::Fn(StateName::HtmlTextCdataClose)          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(cdata)) +            State::Fn(StateName::HtmlTextCdata)          }      }  } @@ -290,11 +290,11 @@ fn cdata(tokenizer: &mut Tokenizer) -> State {  /// > | a <![CDATA[>&<]]> b  ///                    ^  /// ``` -fn cdata_close(tokenizer: &mut Tokenizer) -> State { +pub fn cdata_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b']') => {              tokenizer.consume(); -            State::Fn(Box::new(cdata_end)) +            State::Fn(StateName::HtmlTextCdataEnd)          }          _ => cdata(tokenizer),      } @@ -306,7 +306,7 @@ fn cdata_close(tokenizer: &mut Tokenizer) -> State {  /// > | a <![CDATA[>&<]]> b  ///                     ^  /// ``` -fn cdata_end(tokenizer: &mut Tokenizer) -> State { +pub fn cdata_end(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => end(tokenizer),          Some(b']') => cdata_close(tokenizer), @@ -320,16 +320,16 @@ fn cdata_end(tokenizer: &mut Tokenizer) -> State {  /// > | a <!b> c  ///          ^  /// ``` -fn declaration(tokenizer: &mut Tokenizer) -> State { +pub fn declaration(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'>') => end(tokenizer),          Some(b'\n') => { -            tokenizer.tokenize_state.return_state = Some(Box::new(declaration)); -            at_line_ending(tokenizer) +            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextDeclaration); +            line_ending_before(tokenizer)          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(declaration)) +            State::Fn(StateName::HtmlTextDeclaration)          }      }  } @@ -340,20 +340,20 @@ fn declaration(tokenizer: &mut Tokenizer) -> State {  /// > | a <?b?> c  ///         ^  /// ``` -fn instruction(tokenizer: &mut Tokenizer) -> State { +pub fn instruction(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Nok,          Some(b'\n') => { -            tokenizer.tokenize_state.return_state = Some(Box::new(instruction)); -            at_line_ending(tokenizer) +            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextInstruction); +            line_ending_before(tokenizer)          }          Some(b'?') => {              tokenizer.consume(); -            State::Fn(Box::new(instruction_close)) +            State::Fn(StateName::HtmlTextInstructionClose)          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(instruction)) +            State::Fn(StateName::HtmlTextInstruction)          }      }  } @@ -364,7 +364,7 @@ fn instruction(tokenizer: &mut Tokenizer) -> State {  /// > | a <?b?> c  ///           ^  /// ``` -fn instruction_close(tokenizer: &mut Tokenizer) -> State { +pub fn instruction_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => end(tokenizer),          _ => instruction(tokenizer), @@ -377,12 +377,12 @@ fn instruction_close(tokenizer: &mut Tokenizer) -> State {  /// > | a </b> c  ///         ^  /// ``` -fn tag_close_start(tokenizer: &mut Tokenizer) -> State { +pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII alphabetical.          Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_close)) +            State::Fn(StateName::HtmlTextTagClose)          }          _ => State::Nok,      } @@ -394,12 +394,12 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State {  /// > | a </b> c  ///          ^  /// ``` -fn tag_close(tokenizer: &mut Tokenizer) -> State { +pub fn tag_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII alphanumerical and `-`.          Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_close)) +            State::Fn(StateName::HtmlTextTagClose)          }          _ => tag_close_between(tokenizer),      } @@ -411,15 +411,15 @@ fn tag_close(tokenizer: &mut Tokenizer) -> State {  /// > | a </b> c  ///          ^  /// ``` -fn tag_close_between(tokenizer: &mut Tokenizer) -> State { +pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n') => { -            tokenizer.tokenize_state.return_state = Some(Box::new(tag_close_between)); -            at_line_ending(tokenizer) +            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagCloseBetween); +            line_ending_before(tokenizer)          }          Some(b'\t' | b' ') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_close_between)) +            State::Fn(StateName::HtmlTextTagCloseBetween)          }          _ => end(tokenizer),      } @@ -431,12 +431,12 @@ fn tag_close_between(tokenizer: &mut Tokenizer) -> State {  /// > | a <b> c  ///         ^  /// ``` -fn tag_open(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII alphanumerical and `-`.          Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open)) +            State::Fn(StateName::HtmlTextTagOpen)          }          Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),          _ => State::Nok, @@ -449,24 +449,24 @@ fn tag_open(tokenizer: &mut Tokenizer) -> State {  /// > | a <b> c  ///         ^  /// ``` -fn tag_open_between(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n') => { -            tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_between)); -            at_line_ending(tokenizer) +            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagOpenBetween); +            line_ending_before(tokenizer)          }          Some(b'\t' | b' ') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open_between)) +            State::Fn(StateName::HtmlTextTagOpenBetween)          }          Some(b'/') => {              tokenizer.consume(); -            State::Fn(Box::new(end)) +            State::Fn(StateName::HtmlTextEnd)          }          // ASCII alphabetical and `:` and `_`.          Some(b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_name)) +            State::Fn(StateName::HtmlTextTagOpenAttributeName)          }          _ => end(tokenizer),      } @@ -478,12 +478,12 @@ fn tag_open_between(tokenizer: &mut Tokenizer) -> State {  /// > | a <b c> d  ///          ^  /// ``` -fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // ASCII alphabetical and `-`, `.`, `:`, and `_`.          Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_name)) +            State::Fn(StateName::HtmlTextTagOpenAttributeName)          }          _ => tag_open_attribute_name_after(tokenizer),      } @@ -496,19 +496,20 @@ fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {  /// > | a <b c> d  ///           ^  /// ``` -fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n') => { -            tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_name_after)); -            at_line_ending(tokenizer) +            tokenizer.tokenize_state.return_state = +                Some(StateName::HtmlTextTagOpenAttributeNameAfter); +            line_ending_before(tokenizer)          }          Some(b'\t' | b' ') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_name_after)) +            State::Fn(StateName::HtmlTextTagOpenAttributeNameAfter)          }          Some(b'=') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_value_before)) +            State::Fn(StateName::HtmlTextTagOpenAttributeValueBefore)          }          _ => tag_open_between(tokenizer),      } @@ -521,25 +522,26 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {  /// > | a <b c=d> e  ///            ^  /// ``` -fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok,          Some(b'\n') => { -            tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_before)); -            at_line_ending(tokenizer) +            tokenizer.tokenize_state.return_state = +                Some(StateName::HtmlTextTagOpenAttributeValueBefore); +            line_ending_before(tokenizer)          }          Some(b'\t' | b' ') => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_value_before)) +            State::Fn(StateName::HtmlTextTagOpenAttributeValueBefore)          }          Some(b'"' | b'\'') => {              tokenizer.tokenize_state.marker = tokenizer.current.unwrap();              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_value_quoted)) +            State::Fn(StateName::HtmlTextTagOpenAttributeValueQuoted)          }          Some(_) => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_value_unquoted)) +            State::Fn(StateName::HtmlTextTagOpenAttributeValueUnquoted)          }      }  } @@ -550,24 +552,25 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {  /// > | a <b c="d"> e  ///             ^  /// ``` -fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => {              tokenizer.tokenize_state.marker = 0;              State::Nok          }          Some(b'\n') => { -            tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_quoted)); -            at_line_ending(tokenizer) +            tokenizer.tokenize_state.return_state = +                Some(StateName::HtmlTextTagOpenAttributeValueQuoted); +            line_ending_before(tokenizer)          }          Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {              tokenizer.tokenize_state.marker = 0;              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_value_quoted_after)) +            State::Fn(StateName::HtmlTextTagOpenAttributeValueQuotedAfter)          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_value_quoted)) +            State::Fn(StateName::HtmlTextTagOpenAttributeValueQuoted)          }      }  } @@ -578,13 +581,13 @@ fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {  /// > | a <b c=d> e  ///            ^  /// ``` -fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'"' | b'\'' | b'<' | b'=' | b'`') => State::Nok,          Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),          Some(_) => {              tokenizer.consume(); -            State::Fn(Box::new(tag_open_attribute_value_unquoted)) +            State::Fn(StateName::HtmlTextTagOpenAttributeValueUnquoted)          }      }  } @@ -596,7 +599,7 @@ fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {  /// > | a <b c="d"> e  ///               ^  /// ``` -fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { +pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => tag_open_between(tokenizer),          _ => State::Nok, @@ -609,7 +612,7 @@ fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {  /// > | a <b c="d"> e  ///               ^  /// ``` -fn end(tokenizer: &mut Tokenizer) -> State { +pub fn end(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'>') => {              tokenizer.consume(); @@ -631,14 +634,14 @@ fn end(tokenizer: &mut Tokenizer) -> State {  ///            ^  ///   | b-->  /// ``` -fn at_line_ending(tokenizer: &mut Tokenizer) -> State { +pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\n') => {              tokenizer.exit(Token::HtmlTextData);              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); -            State::Fn(Box::new(after_line_ending)) +            State::Fn(StateName::HtmlTextLineEndingAfter)          }          _ => unreachable!("expected eol"),      } @@ -654,8 +657,9 @@ fn at_line_ending(tokenizer: &mut Tokenizer) -> State {  /// > | b-->  ///     ^  /// ``` -fn after_line_ending(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt_opt(space_or_tab(), after_line_ending_prefix)(tokenizer) +pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab(tokenizer); +    tokenizer.attempt_opt(state_name, StateName::HtmlTextLineEndingAfterPrefix)  }  /// After a line ending, after indent. @@ -668,8 +672,9 @@ fn after_line_ending(tokenizer: &mut Tokenizer) -> State {  /// > | b-->  ///     ^  /// ``` -fn after_line_ending_prefix(tokenizer: &mut Tokenizer) -> State { -    let return_state = tokenizer.tokenize_state.return_state.take().unwrap(); +pub fn line_ending_after_prefix(tokenizer: &mut Tokenizer) -> State { +    let state_name = tokenizer.tokenize_state.return_state.take().unwrap(); +    let func = state_name.to_func();      tokenizer.enter(Token::HtmlTextData); -    return_state(tokenizer) +    func(tokenizer)  } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index b38e15a..ae9fe77 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -147,12 +147,9 @@  //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element  use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX; -use crate::construct::{ -    partial_destination::start as destination, partial_label::start as label, -    partial_space_or_tab::space_or_tab_eol, partial_title::start as title, -}; +use crate::construct::partial_space_or_tab::space_or_tab_eol;  use crate::token::Token; -use crate::tokenizer::{Event, EventType, Media, State, Tokenizer}; +use crate::tokenizer::{Event, EventType, Media, State, StateName, Tokenizer};  use crate::util::{      normalize_identifier::normalize_identifier,      skip, @@ -204,7 +201,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              tokenizer.consume();              tokenizer.exit(Token::LabelMarker);              tokenizer.exit(Token::LabelEnd); -            return State::Fn(Box::new(after)); +            return State::Fn(StateName::LabelEndAfter);          }      } @@ -223,7 +220,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | [a] b  ///       ^  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      let start = &tokenizer.label_start_stack[tokenizer.tokenize_state.start];      let defined = tokenizer          .parse_state @@ -240,19 +237,23 @@ fn after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // Resource (`[asd](fgh)`)? -        Some(b'(') => tokenizer.attempt(resource, move |is_ok| { -            Box::new(if is_ok || defined { ok } else { nok }) -        })(tokenizer), +        Some(b'(') => tokenizer.attempt(StateName::LabelEndResourceStart, move |is_ok| { +            State::Fn(if is_ok || defined { +                StateName::LabelEndOk +            } else { +                StateName::LabelEndNok +            }) +        }),          // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference? -        Some(b'[') => tokenizer.attempt(full_reference, move |is_ok| { -            Box::new(if is_ok { -                ok +        Some(b'[') => tokenizer.attempt(StateName::LabelEndReferenceFull, move |is_ok| { +            State::Fn(if is_ok { +                StateName::LabelEndOk              } else if defined { -                reference_not_full +                StateName::LabelEndReferenceNotFull              } else { -                nok +                StateName::LabelEndNok              }) -        })(tokenizer), +        }),          // Shortcut (`[asd]`) reference?          _ => {              let func = if defined { ok } else { nok }; @@ -271,10 +272,14 @@ fn after(tokenizer: &mut Tokenizer) -> State {  /// > | [a] b  ///        ^  /// ``` -fn reference_not_full(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt(collapsed_reference, |is_ok| { -        Box::new(if is_ok { ok } else { nok }) -    })(tokenizer) +pub fn reference_not_full(tokenizer: &mut Tokenizer) -> State { +    tokenizer.attempt(StateName::LabelEndReferenceCollapsed, |is_ok| { +        State::Fn(if is_ok { +            StateName::LabelEndOk +        } else { +            StateName::LabelEndNok +        }) +    })  }  /// Done, we found something. @@ -289,7 +294,7 @@ fn reference_not_full(tokenizer: &mut Tokenizer) -> State {  /// > | [a] b  ///        ^  /// ``` -fn ok(tokenizer: &mut Tokenizer) -> State { +pub fn ok(tokenizer: &mut Tokenizer) -> State {      let label_start_index = tokenizer.tokenize_state.start;      // Remove this one and everything after it.      let mut left = tokenizer.label_start_stack.split_off(label_start_index); @@ -332,7 +337,7 @@ fn ok(tokenizer: &mut Tokenizer) -> State {  /// > | [a] b  ///        ^  /// ``` -fn nok(tokenizer: &mut Tokenizer) -> State { +pub fn nok(tokenizer: &mut Tokenizer) -> State {      tokenizer          .label_start_stack          .get_mut(tokenizer.tokenize_state.start) @@ -349,14 +354,14 @@ fn nok(tokenizer: &mut Tokenizer) -> State {  /// > | [a](b) c  ///        ^  /// ``` -fn resource(tokenizer: &mut Tokenizer) -> State { +pub fn resource_start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'(') => {              tokenizer.enter(Token::Resource);              tokenizer.enter(Token::ResourceMarker);              tokenizer.consume();              tokenizer.exit(Token::ResourceMarker); -            State::Fn(Box::new(resource_start)) +            State::Fn(StateName::LabelEndResourceBefore)          }          _ => unreachable!("expected `(`"),      } @@ -368,8 +373,9 @@ fn resource(tokenizer: &mut Tokenizer) -> State {  /// > | [a](b) c  ///         ^  /// ``` -fn resource_start(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt_opt(space_or_tab_eol(), resource_open)(tokenizer) +pub fn resource_before(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab_eol(tokenizer); +    tokenizer.attempt_opt(state_name, StateName::LabelEndResourceOpen)  }  /// At the start of a resource, after optional whitespace. @@ -378,7 +384,7 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State {  /// > | [a](b) c  ///         ^  /// ``` -fn resource_open(tokenizer: &mut Tokenizer) -> State { +pub fn resource_open(tokenizer: &mut Tokenizer) -> State {      if let Some(b')') = tokenizer.current {          resource_end(tokenizer)      } else { @@ -389,13 +395,13 @@ fn resource_open(tokenizer: &mut Tokenizer) -> State {          tokenizer.tokenize_state.token_5 = Token::ResourceDestinationString;          tokenizer.tokenize_state.size_other = RESOURCE_DESTINATION_BALANCE_MAX; -        tokenizer.attempt(destination, |ok| { -            Box::new(if ok { -                destination_after +        tokenizer.attempt(StateName::DestinationStart, |ok| { +            State::Fn(if ok { +                StateName::LabelEndResourceDestinationAfter              } else { -                destination_missing +                StateName::LabelEndResourceDestinationMissing              }) -        })(tokenizer) +        })      }  } @@ -405,21 +411,26 @@ fn resource_open(tokenizer: &mut Tokenizer) -> State {  /// > | [a](b) c  ///          ^  /// ``` -fn destination_after(tokenizer: &mut Tokenizer) -> State { +pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::Data;      tokenizer.tokenize_state.token_2 = Token::Data;      tokenizer.tokenize_state.token_3 = Token::Data;      tokenizer.tokenize_state.token_4 = Token::Data;      tokenizer.tokenize_state.token_5 = Token::Data;      tokenizer.tokenize_state.size_other = 0; - -    tokenizer.attempt(space_or_tab_eol(), |ok| { -        Box::new(if ok { resource_between } else { resource_end }) -    })(tokenizer) +    let state_name = space_or_tab_eol(tokenizer); + +    tokenizer.attempt(state_name, |ok| { +        State::Fn(if ok { +            StateName::LabelEndResourceBetween +        } else { +            StateName::LabelEndResourceEnd +        }) +    })  }  /// Without destination. -fn destination_missing(tokenizer: &mut Tokenizer) -> State { +pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::Data;      tokenizer.tokenize_state.token_2 = Token::Data;      tokenizer.tokenize_state.token_3 = Token::Data; @@ -435,13 +446,13 @@ fn destination_missing(tokenizer: &mut Tokenizer) -> State {  /// > | [a](b ) c  ///           ^  /// ``` -fn resource_between(tokenizer: &mut Tokenizer) -> State { +pub fn resource_between(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'"' | b'\'' | b'(') => {              tokenizer.tokenize_state.token_1 = Token::ResourceTitle;              tokenizer.tokenize_state.token_2 = Token::ResourceTitleMarker;              tokenizer.tokenize_state.token_3 = Token::ResourceTitleString; -            tokenizer.go(title, title_after)(tokenizer) +            tokenizer.go(StateName::TitleStart, StateName::LabelEndResourceTitleAfter)          }          _ => resource_end(tokenizer),      } @@ -453,11 +464,12 @@ fn resource_between(tokenizer: &mut Tokenizer) -> State {  /// > | [a](b "c") d  ///              ^  /// ``` -fn title_after(tokenizer: &mut Tokenizer) -> State { +pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::Data;      tokenizer.tokenize_state.token_2 = Token::Data;      tokenizer.tokenize_state.token_3 = Token::Data; -    tokenizer.attempt_opt(space_or_tab_eol(), resource_end)(tokenizer) +    let state_name = space_or_tab_eol(tokenizer); +    tokenizer.attempt_opt(state_name, StateName::LabelEndResourceEnd)  }  /// In a resource, at the `)`. @@ -466,7 +478,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {  /// > | [a](b) d  ///          ^  /// ``` -fn resource_end(tokenizer: &mut Tokenizer) -> State { +pub fn resource_end(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b')') => {              tokenizer.enter(Token::ResourceMarker); @@ -485,13 +497,13 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State {  /// > | [a][b] d  ///        ^  /// ``` -fn full_reference(tokenizer: &mut Tokenizer) -> State { +pub fn reference_full(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'[') => {              tokenizer.tokenize_state.token_1 = Token::Reference;              tokenizer.tokenize_state.token_2 = Token::ReferenceMarker;              tokenizer.tokenize_state.token_3 = Token::ReferenceString; -            tokenizer.go(label, full_reference_after)(tokenizer) +            tokenizer.go(StateName::LabelStart, StateName::LabelEndReferenceFullAfter)          }          _ => unreachable!("expected `[`"),      } @@ -503,7 +515,7 @@ fn full_reference(tokenizer: &mut Tokenizer) -> State {  /// > | [a][b] d  ///          ^  /// ``` -fn full_reference_after(tokenizer: &mut Tokenizer) -> State { +pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_1 = Token::Data;      tokenizer.tokenize_state.token_2 = Token::Data;      tokenizer.tokenize_state.token_3 = Token::Data; @@ -541,14 +553,14 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State {  /// > | [a][] d  ///        ^  /// ``` -fn collapsed_reference(tokenizer: &mut Tokenizer) -> State { +pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'[') => {              tokenizer.enter(Token::Reference);              tokenizer.enter(Token::ReferenceMarker);              tokenizer.consume();              tokenizer.exit(Token::ReferenceMarker); -            State::Fn(Box::new(collapsed_reference_open)) +            State::Fn(StateName::LabelEndReferenceCollapsedOpen)          }          _ => State::Nok,      } @@ -562,7 +574,7 @@ fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {  /// > | [a][] d  ///         ^  /// ``` -fn collapsed_reference_open(tokenizer: &mut Tokenizer) -> State { +pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b']') => {              tokenizer.enter(Token::ReferenceMarker); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 4a3508e..4fcf8c2 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -30,7 +30,7 @@  use super::label_end::resolve_media;  use crate::token::Token; -use crate::tokenizer::{LabelStart, State, Tokenizer}; +use crate::tokenizer::{LabelStart, State, StateName, Tokenizer};  /// Start of label (image) start.  /// @@ -45,7 +45,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              tokenizer.enter(Token::LabelImageMarker);              tokenizer.consume();              tokenizer.exit(Token::LabelImageMarker); -            State::Fn(Box::new(open)) +            State::Fn(StateName::LabelStartImageOpen)          }          _ => State::Nok,      } diff --git a/src/construct/list.rs b/src/construct/list.rs index 0e12b7c..6ecfb04 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -45,12 +45,9 @@  //! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure  use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}; -use crate::construct::{ -    blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max, -    thematic_break::start as thematic_break, -}; +use crate::construct::partial_space_or_tab::space_or_tab_min_max;  use crate::token::Token; -use crate::tokenizer::{EventType, State, Tokenizer}; +use crate::tokenizer::{EventType, State, StateName, Tokenizer};  use crate::util::{      skip,      slice::{Position, Slice}, @@ -65,17 +62,16 @@ use crate::util::{  pub fn start(tokenizer: &mut Tokenizer) -> State {      if tokenizer.parse_state.constructs.list {          tokenizer.enter(Token::ListItem); -        tokenizer.go( -            space_or_tab_min_max( -                0, -                if tokenizer.parse_state.constructs.code_indented { -                    TAB_SIZE - 1 -                } else { -                    usize::MAX -                }, -            ), -            before, -        )(tokenizer) +        let state_name = space_or_tab_min_max( +            tokenizer, +            0, +            if tokenizer.parse_state.constructs.code_indented { +                TAB_SIZE - 1 +            } else { +                usize::MAX +            }, +        ); +        tokenizer.go(state_name, StateName::ListBefore)      } else {          State::Nok      } @@ -87,12 +83,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | * a  ///     ^  /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // Unordered. -        Some(b'*' | b'-') => tokenizer.check(thematic_break, |ok| { -            Box::new(if ok { nok } else { before_unordered }) -        })(tokenizer), +        Some(b'*' | b'-') => tokenizer.check(StateName::ThematicBreakStart, |ok| { +            State::Fn(if ok { +                StateName::ListNok +            } else { +                StateName::ListBeforeUnordered +            }) +        }),          Some(b'+') => before_unordered(tokenizer),          // Ordered.          Some(b'0'..=b'9') if !tokenizer.interrupt => before_ordered(tokenizer), @@ -109,7 +109,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// > | * a  ///     ^  /// ``` -fn before_unordered(tokenizer: &mut Tokenizer) -> State { +pub fn before_unordered(tokenizer: &mut Tokenizer) -> State {      tokenizer.enter(Token::ListItemPrefix);      marker(tokenizer)  } @@ -120,10 +120,10 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State {  /// > | * a  ///     ^  /// ``` -fn before_ordered(tokenizer: &mut Tokenizer) -> State { +pub fn before_ordered(tokenizer: &mut Tokenizer) -> State {      tokenizer.enter(Token::ListItemPrefix);      tokenizer.enter(Token::ListItemValue); -    inside(tokenizer) +    value(tokenizer)  }  /// In an ordered list item value. @@ -132,7 +132,7 @@ fn before_ordered(tokenizer: &mut Tokenizer) -> State {  /// > | 1. a  ///     ^  /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn value(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'.' | b')') if !tokenizer.interrupt || tokenizer.tokenize_state.size < 2 => {              tokenizer.exit(Token::ListItemValue); @@ -141,7 +141,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {          Some(b'0'..=b'9') if tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {              tokenizer.tokenize_state.size += 1;              tokenizer.consume(); -            State::Fn(Box::new(inside)) +            State::Fn(StateName::ListValue)          }          _ => {              tokenizer.tokenize_state.size = 0; @@ -158,11 +158,11 @@ fn inside(tokenizer: &mut Tokenizer) -> State {  /// > | 1. b  ///      ^  /// ``` -fn marker(tokenizer: &mut Tokenizer) -> State { +pub fn marker(tokenizer: &mut Tokenizer) -> State {      tokenizer.enter(Token::ListItemMarker);      tokenizer.consume();      tokenizer.exit(Token::ListItemMarker); -    State::Fn(Box::new(marker_after)) +    State::Fn(StateName::ListMarkerAfter)  }  /// After a list item marker. @@ -173,11 +173,15 @@ fn marker(tokenizer: &mut Tokenizer) -> State {  /// > | 1. b  ///       ^  /// ``` -fn marker_after(tokenizer: &mut Tokenizer) -> State { +pub fn marker_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.size = 1; -    tokenizer.check(blank_line, |ok| { -        Box::new(if ok { after } else { marker_after_not_blank }) -    })(tokenizer) +    tokenizer.check(StateName::BlankLineStart, |ok| { +        State::Fn(if ok { +            StateName::ListAfter +        } else { +            StateName::ListMarkerAfterFilled +        }) +    })  }  /// After a list item marker, not followed by a blank line. @@ -186,13 +190,17 @@ fn marker_after(tokenizer: &mut Tokenizer) -> State {  /// > | * a  ///      ^  /// ``` -fn marker_after_not_blank(tokenizer: &mut Tokenizer) -> State { +pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.size = 0;      // Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace. -    tokenizer.attempt(whitespace, |ok| { -        Box::new(if ok { after } else { prefix_other }) -    })(tokenizer) +    tokenizer.attempt(StateName::ListWhitespace, |ok| { +        State::Fn(if ok { +            StateName::ListAfter +        } else { +            StateName::ListPrefixOther +        }) +    })  }  /// In whitespace after a marker. @@ -201,8 +209,9 @@ fn marker_after_not_blank(tokenizer: &mut Tokenizer) -> State {  /// > | * a  ///      ^  /// ``` -fn whitespace(tokenizer: &mut Tokenizer) -> State { -    tokenizer.go(space_or_tab_min_max(1, TAB_SIZE), whitespace_after)(tokenizer) +pub fn whitespace(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab_min_max(tokenizer, 1, TAB_SIZE); +    tokenizer.go(state_name, StateName::ListWhitespaceAfter)  }  /// After acceptable whitespace. @@ -211,7 +220,7 @@ fn whitespace(tokenizer: &mut Tokenizer) -> State {  /// > | * a  ///      ^  /// ``` -fn whitespace_after(tokenizer: &mut Tokenizer) -> State { +pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State {      if let Some(b'\t' | b' ') = tokenizer.current {          State::Nok      } else { @@ -225,13 +234,13 @@ fn whitespace_after(tokenizer: &mut Tokenizer) -> State {  /// > | * a  ///      ^  /// ``` -fn prefix_other(tokenizer: &mut Tokenizer) -> State { +pub fn prefix_other(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\t' | b' ') => {              tokenizer.enter(Token::SpaceOrTab);              tokenizer.consume();              tokenizer.exit(Token::SpaceOrTab); -            State::Fn(Box::new(after)) +            State::Fn(StateName::ListAfter)          }          _ => State::Nok,      } @@ -243,7 +252,7 @@ fn prefix_other(tokenizer: &mut Tokenizer) -> State {  /// > | * a  ///       ^  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      let blank = tokenizer.tokenize_state.size == 1;      tokenizer.tokenize_state.size = 0; @@ -285,10 +294,14 @@ fn after(tokenizer: &mut Tokenizer) -> State {  /// > |   b  ///     ^  /// ``` -pub fn cont(tokenizer: &mut Tokenizer) -> State { -    tokenizer.check(blank_line, |ok| { -        Box::new(if ok { blank_cont } else { not_blank_cont }) -    })(tokenizer) +pub fn cont_start(tokenizer: &mut Tokenizer) -> State { +    tokenizer.check(StateName::BlankLineStart, |ok| { +        State::Fn(if ok { +            StateName::ListContBlank +        } else { +            StateName::ListContFilled +        }) +    })  }  /// Start of blank list item continuation. @@ -299,15 +312,16 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State {  ///     ^  ///   |   b  /// ``` -pub fn blank_cont(tokenizer: &mut Tokenizer) -> State { +pub fn cont_blank(tokenizer: &mut Tokenizer) -> State {      let container = tokenizer.container.as_ref().unwrap();      let size = container.size;      if container.blank_initial {          State::Nok      } else { +        let state_name = space_or_tab_min_max(tokenizer, 0, size);          // Consume, optionally, at most `size`. -        tokenizer.go(space_or_tab_min_max(0, size), ok)(tokenizer) +        tokenizer.go(state_name, StateName::ListOk)      }  } @@ -318,14 +332,15 @@ pub fn blank_cont(tokenizer: &mut Tokenizer) -> State {  /// > |   b  ///     ^  /// ``` -pub fn not_blank_cont(tokenizer: &mut Tokenizer) -> State { +pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {      let container = tokenizer.container.as_mut().unwrap();      let size = container.size;      container.blank_initial = false;      // Consume exactly `size`. -    tokenizer.go(space_or_tab_min_max(size, size), ok)(tokenizer) +    let state_name = space_or_tab_min_max(tokenizer, size, size); +    tokenizer.go(state_name, StateName::ListOk)  }  /// A state fn to yield [`State::Ok`]. @@ -334,16 +349,16 @@ pub fn ok(_tokenizer: &mut Tokenizer) -> State {  }  /// A state fn to yield [`State::Nok`]. -fn nok(_tokenizer: &mut Tokenizer) -> State { +pub fn nok(_tokenizer: &mut Tokenizer) -> State {      State::Nok  }  /// Find adjacent list items with the same marker.  pub fn resolve_list_item(tokenizer: &mut Tokenizer) { -    let mut index = 0; -    let mut balance = 0;      let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];      let mut lists: Vec<(u8, usize, usize, usize)> = vec![]; +    let mut index = 0; +    let mut balance = 0;      // Merge list items.      while index < tokenizer.events.len() { diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 7fdaa66..de750f4 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -33,7 +33,7 @@  //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element  use crate::token::Token; -use crate::tokenizer::{ContentType, EventType, State, Tokenizer}; +use crate::tokenizer::{ContentType, EventType, State, StateName, Tokenizer};  use crate::util::skip::opt as skip_opt;  /// Before a paragraph. @@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | abc  ///     ^^^  /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::Data); @@ -71,7 +71,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(inside)) +            State::Fn(StateName::ParagraphInside)          }      }  } diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs index 2257bfd..b32b7f9 100644 --- a/src/construct/partial_bom.rs +++ b/src/construct/partial_bom.rs @@ -11,7 +11,7 @@  //! *   [`micromark/lib/preprocess.js` in `micromark`](https://github.com/micromark/micromark/blob/ed23453/packages/micromark/dev/lib/preprocess.js#L54-L60)  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF]; @@ -36,7 +36,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | 0xEF 0xBB 0xBF  ///     ^^^^ ^^^^ ^^^^  /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State {      if tokenizer.current == Some(BOM[tokenizer.tokenize_state.size]) {          tokenizer.tokenize_state.size += 1;          tokenizer.consume(); @@ -45,7 +45,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {              tokenizer.tokenize_state.size = 0;              State::Ok          } else { -            State::Fn(Box::new(inside)) +            State::Fn(StateName::BomInside)          }      } else {          tokenizer.tokenize_state.size = 0; diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index 0365489..1cb5e61 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -7,7 +7,7 @@  //! [text]: crate::content::text  use crate::token::Token; -use crate::tokenizer::{EventType, State, Tokenizer}; +use crate::tokenizer::{EventType, State, StateName, Tokenizer};  /// At the beginning of data.  /// @@ -17,10 +17,11 @@ use crate::tokenizer::{EventType, State, Tokenizer};  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { +        // Make sure to eat the first `stop`.          Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {              tokenizer.enter(Token::Data);              tokenizer.consume(); -            State::Fn(Box::new(data)) +            State::Fn(StateName::DataInside)          }          _ => at_break(tokenizer),      } @@ -32,14 +33,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | abc  ///     ^  /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Ok,          Some(b'\n') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); -            State::Fn(Box::new(at_break)) +            State::Fn(StateName::DataAtBreak)          }          Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {              tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data)); @@ -47,7 +48,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {          }          _ => {              tokenizer.enter(Token::Data); -            data(tokenizer) +            inside(tokenizer)          }      }  } @@ -58,7 +59,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {  /// > | abc  ///     ^^^  /// ``` -fn data(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State {      let done = match tokenizer.current {          None | Some(b'\n') => true,          Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => true, @@ -70,7 +71,7 @@ fn data(tokenizer: &mut Tokenizer) -> State {          at_break(tokenizer)      } else {          tokenizer.consume(); -        State::Fn(Box::new(data)) +        State::Fn(StateName::DataInside)      }  } diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index f1cfc7d..e8818a0 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -72,7 +72,7 @@  //! [sanitize_uri]: crate::util::sanitize_uri  use crate::token::Token; -use crate::tokenizer::{ContentType, State, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer};  /// Before a destination.  /// @@ -90,7 +90,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              tokenizer.enter(tokenizer.tokenize_state.token_3.clone());              tokenizer.consume();              tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); -            State::Fn(Box::new(enclosed_before)) +            State::Fn(StateName::DestinationEnclosedBefore)          }          // ASCII control, space, closing paren, but *not* `\0`.          None | Some(0x01..=0x1F | b' ' | b')' | 0x7F) => State::Nok, @@ -110,7 +110,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | <aa>  ///      ^  /// ``` -fn enclosed_before(tokenizer: &mut Tokenizer) -> State { +pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State {      if let Some(b'>') = tokenizer.current {          tokenizer.enter(tokenizer.tokenize_state.token_3.clone());          tokenizer.consume(); @@ -131,7 +131,7 @@ fn enclosed_before(tokenizer: &mut Tokenizer) -> State {  /// > | <aa>  ///      ^  /// ``` -fn enclosed(tokenizer: &mut Tokenizer) -> State { +pub fn enclosed(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n' | b'<') => State::Nok,          Some(b'>') => { @@ -141,11 +141,11 @@ fn enclosed(tokenizer: &mut Tokenizer) -> State {          }          Some(b'\\') => {              tokenizer.consume(); -            State::Fn(Box::new(enclosed_escape)) +            State::Fn(StateName::DestinationEnclosedEscape)          }          _ => {              tokenizer.consume(); -            State::Fn(Box::new(enclosed)) +            State::Fn(StateName::DestinationEnclosed)          }      }  } @@ -156,11 +156,11 @@ fn enclosed(tokenizer: &mut Tokenizer) -> State {  /// > | <a\*a>  ///        ^  /// ``` -fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { +pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'<' | b'>' | b'\\') => {              tokenizer.consume(); -            State::Fn(Box::new(enclosed)) +            State::Fn(StateName::DestinationEnclosed)          }          _ => enclosed(tokenizer),      } @@ -172,7 +172,7 @@ fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {  /// > | aa  ///     ^  /// ``` -fn raw(tokenizer: &mut Tokenizer) -> State { +pub fn raw(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\t' | b'\n' | b' ' | b')') if tokenizer.tokenize_state.size == 0 => {              tokenizer.exit(Token::Data); @@ -185,7 +185,7 @@ fn raw(tokenizer: &mut Tokenizer) -> State {          Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_other => {              tokenizer.consume();              tokenizer.tokenize_state.size += 1; -            State::Fn(Box::new(raw)) +            State::Fn(StateName::DestinationRaw)          }          // ASCII control (but *not* `\0`) and space and `(`.          None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) => { @@ -195,15 +195,15 @@ fn raw(tokenizer: &mut Tokenizer) -> State {          Some(b')') => {              tokenizer.consume();              tokenizer.tokenize_state.size -= 1; -            State::Fn(Box::new(raw)) +            State::Fn(StateName::DestinationRaw)          }          Some(b'\\') => {              tokenizer.consume(); -            State::Fn(Box::new(raw_escape)) +            State::Fn(StateName::DestinationRawEscape)          }          Some(_) => {              tokenizer.consume(); -            State::Fn(Box::new(raw)) +            State::Fn(StateName::DestinationRaw)          }      }  } @@ -214,11 +214,11 @@ fn raw(tokenizer: &mut Tokenizer) -> State {  /// > | a\*a  ///       ^  /// ``` -fn raw_escape(tokenizer: &mut Tokenizer) -> State { +pub fn raw_escape(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'(' | b')' | b'\\') => {              tokenizer.consume(); -            State::Fn(Box::new(raw)) +            State::Fn(StateName::DestinationRaw)          }          _ => raw(tokenizer),      } diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 0e1c2ec..0c8366e 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -62,7 +62,7 @@ use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};  use crate::constant::LINK_REFERENCE_SIZE_MAX;  use crate::subtokenize::link;  use crate::token::Token; -use crate::tokenizer::{ContentType, State, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer};  /// Before a label.  /// @@ -78,7 +78,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              tokenizer.consume();              tokenizer.exit(tokenizer.tokenize_state.token_2.clone());              tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); -            State::Fn(Box::new(at_break)) +            State::Fn(StateName::LabelAtBreak)          }          _ => State::Nok,      } @@ -90,7 +90,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | [a]  ///      ^  /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State {      if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX          || matches!(tokenizer.current, None | Some(b'['))          || (matches!(tokenizer.current, Some(b']')) && !tokenizer.tokenize_state.seen) @@ -101,13 +101,22 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {          State::Nok      } else {          match tokenizer.current { -            Some(b'\n') => tokenizer.attempt( -                space_or_tab_eol_with_options(EolOptions { -                    content_type: Some(ContentType::String), -                    connect: tokenizer.tokenize_state.connect, -                }), -                |ok| Box::new(if ok { after_eol } else { at_blank_line }), -            )(tokenizer), +            Some(b'\n') => { +                let state_name = space_or_tab_eol_with_options( +                    tokenizer, +                    EolOptions { +                        content_type: Some(ContentType::String), +                        connect: tokenizer.tokenize_state.connect, +                    }, +                ); +                tokenizer.attempt(state_name, |ok| { +                    State::Fn(if ok { +                        StateName::LabelEolAfter +                    } else { +                        StateName::LabelAtBlankLine +                    }) +                }) +            }              Some(b']') => {                  tokenizer.exit(tokenizer.tokenize_state.token_3.clone());                  tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); @@ -129,20 +138,20 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {                      tokenizer.tokenize_state.connect = true;                  } -                label(tokenizer) +                inside(tokenizer)              }          }      }  }  /// To do. -fn after_eol(tokenizer: &mut Tokenizer) -> State { +pub fn eol_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.connect = true;      at_break(tokenizer)  }  /// To do. -fn at_blank_line(tokenizer: &mut Tokenizer) -> State { +pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.marker = 0;      tokenizer.tokenize_state.connect = false;      State::Nok @@ -154,7 +163,7 @@ fn at_blank_line(tokenizer: &mut Tokenizer) -> State {  /// > | [a]  ///      ^  /// ``` -fn label(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n' | b'[' | b']') => {              tokenizer.exit(Token::Data); @@ -165,13 +174,16 @@ fn label(tokenizer: &mut Tokenizer) -> State {                  tokenizer.exit(Token::Data);                  at_break(tokenizer)              } else { -                let func = if matches!(byte, b'\\') { escape } else { label };                  tokenizer.consume();                  tokenizer.tokenize_state.size += 1;                  if !tokenizer.tokenize_state.seen && !matches!(byte, b'\t' | b' ') {                      tokenizer.tokenize_state.seen = true;                  } -                State::Fn(Box::new(func)) +                State::Fn(if matches!(byte, b'\\') { +                    StateName::LabelEscape +                } else { +                    StateName::LabelInside +                })              }          }      } @@ -183,13 +195,13 @@ fn label(tokenizer: &mut Tokenizer) -> State {  /// > | [a\*a]  ///        ^  /// ``` -fn escape(tokenizer: &mut Tokenizer) -> State { +pub fn escape(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'[' | b'\\' | b']') => {              tokenizer.consume();              tokenizer.tokenize_state.size += 1; -            State::Fn(Box::new(label)) +            State::Fn(StateName::LabelInside)          } -        _ => label(tokenizer), +        _ => inside(tokenizer),      }  } diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 6005a6c..6d5cd7a 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -11,7 +11,7 @@  //! [html_flow]: crate::construct::html_flow  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of continuation.  /// @@ -26,7 +26,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); -            State::Fn(Box::new(after)) +            State::Fn(StateName::NonLazyContinuationAfter)          }          _ => State::Nok,      } @@ -39,7 +39,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | b  ///     ^  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      if tokenizer.lazy {          State::Nok      } else { diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index e3eac45..b0b35a6 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -6,7 +6,7 @@  use crate::subtokenize::link;  use crate::token::Token; -use crate::tokenizer::{ContentType, State, StateFn, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer};  /// Options to parse `space_or_tab`.  #[derive(Debug)] @@ -37,8 +37,8 @@ pub struct EolOptions {  /// ```bnf  /// space_or_tab ::= 1*( ' ' '\t' )  /// ``` -pub fn space_or_tab() -> Box<StateFn> { -    space_or_tab_min_max(1, usize::MAX) +pub fn space_or_tab(tokenizer: &mut Tokenizer) -> StateName { +    space_or_tab_min_max(tokenizer, 1, usize::MAX)  }  /// Between `x` and `y` `space_or_tab`. @@ -46,26 +46,27 @@ pub fn space_or_tab() -> Box<StateFn> {  /// ```bnf  /// space_or_tab_min_max ::= x*y( ' ' '\t' )  /// ``` -pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> { -    space_or_tab_with_options(Options { -        kind: Token::SpaceOrTab, -        min, -        max, -        content_type: None, -        connect: false, -    }) +pub fn space_or_tab_min_max(tokenizer: &mut Tokenizer, min: usize, max: usize) -> StateName { +    space_or_tab_with_options( +        tokenizer, +        Options { +            kind: Token::SpaceOrTab, +            min, +            max, +            content_type: None, +            connect: false, +        }, +    )  }  /// `space_or_tab`, with the given options. -pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> { -    Box::new(|tokenizer| { -        tokenizer.tokenize_state.space_or_tab_connect = options.connect; -        tokenizer.tokenize_state.space_or_tab_content_type = options.content_type; -        tokenizer.tokenize_state.space_or_tab_min = options.min; -        tokenizer.tokenize_state.space_or_tab_max = options.max; -        tokenizer.tokenize_state.space_or_tab_token = options.kind; -        start(tokenizer) -    }) +pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName { +    tokenizer.tokenize_state.space_or_tab_connect = options.connect; +    tokenizer.tokenize_state.space_or_tab_content_type = options.content_type; +    tokenizer.tokenize_state.space_or_tab_min = options.min; +    tokenizer.tokenize_state.space_or_tab_max = options.max; +    tokenizer.tokenize_state.space_or_tab_token = options.kind; +    StateName::SpaceOrTabStart  }  /// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and @@ -74,41 +75,21 @@ pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {  /// ```bnf  /// space_or_tab_eol ::= 1*( ' ' '\t' ) | 0*( ' ' '\t' ) eol 0*( ' ' '\t' )  /// ``` -pub fn space_or_tab_eol() -> Box<StateFn> { -    space_or_tab_eol_with_options(EolOptions { -        content_type: None, -        connect: false, -    }) +pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName { +    space_or_tab_eol_with_options( +        tokenizer, +        EolOptions { +            content_type: None, +            connect: false, +        }, +    )  }  /// `space_or_tab_eol`, with the given options. -pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> { -    Box::new(move |tokenizer| { -        tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type; -        tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect; - -        tokenizer.attempt( -            space_or_tab_with_options(Options { -                kind: Token::SpaceOrTab, -                min: 1, -                max: usize::MAX, -                content_type: tokenizer -                    .tokenize_state -                    .space_or_tab_eol_content_type -                    .clone(), -                connect: tokenizer.tokenize_state.space_or_tab_eol_connect, -            }), -            move |ok| { -                Box::new(move |tokenizer| { -                    if ok { -                        tokenizer.tokenize_state.space_or_tab_eol_ok = ok; -                    } - -                    after_space_or_tab(tokenizer) -                }) -            }, -        )(tokenizer) -    }) +pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: EolOptions) -> StateName { +    tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type; +    tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect; +    StateName::SpaceOrTabEolStart  }  /// Before `space_or_tab`. @@ -117,7 +98,7 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {  /// > | a␠␠b  ///      ^  /// ``` -fn start(tokenizer: &mut Tokenizer) -> State { +pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\t' | b' ') if tokenizer.tokenize_state.space_or_tab_max > 0 => {              tokenizer.enter_with_content( @@ -144,7 +125,7 @@ fn start(tokenizer: &mut Tokenizer) -> State {  /// > | a␠␠b  ///       ^  /// ``` -fn inside(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'\t' | b' ')              if tokenizer.tokenize_state.space_or_tab_size @@ -152,7 +133,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {          {              tokenizer.consume();              tokenizer.tokenize_state.space_or_tab_size += 1; -            State::Fn(Box::new(inside)) +            State::Fn(StateName::SpaceOrTabInside)          }          _ => {              tokenizer.exit(tokenizer.tokenize_state.space_or_tab_token.clone()); @@ -167,7 +148,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {  /// > | a␠␠b  ///        ^  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      let state = if tokenizer.tokenize_state.space_or_tab_size          >= tokenizer.tokenize_state.space_or_tab_min      { @@ -184,6 +165,44 @@ fn after(tokenizer: &mut Tokenizer) -> State {      state  } +pub fn eol_start(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab_with_options( +        tokenizer, +        Options { +            kind: Token::SpaceOrTab, +            min: 1, +            max: usize::MAX, +            content_type: tokenizer +                .tokenize_state +                .space_or_tab_eol_content_type +                .clone(), +            connect: tokenizer.tokenize_state.space_or_tab_eol_connect, +        }, +    ); + +    tokenizer.attempt(state_name, move |ok| { +        State::Fn(if ok { +            StateName::SpaceOrTabEolAfterFirst +        } else { +            StateName::SpaceOrTabEolAtEol +        }) +    }) +} + +pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { +    tokenizer.tokenize_state.space_or_tab_eol_ok = true; + +    if tokenizer +        .tokenize_state +        .space_or_tab_eol_content_type +        .is_some() +    { +        tokenizer.tokenize_state.space_or_tab_eol_connect = true; +    } + +    eol_at_eol(tokenizer) +} +  /// `space_or_tab_eol`: after optionally first `space_or_tab`.  ///  /// ```markdown @@ -191,16 +210,7 @@ fn after(tokenizer: &mut Tokenizer) -> State {  ///      ^  ///   | b  /// ``` -fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.tokenize_state.space_or_tab_eol_ok -        && tokenizer -            .tokenize_state -            .space_or_tab_eol_content_type -            .is_some() -    { -        tokenizer.tokenize_state.space_or_tab_eol_connect = true; -    } - +pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State {      if let Some(b'\n') = tokenizer.current {          tokenizer.enter_with_content(              Token::LineEnding, @@ -223,17 +233,17 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {          tokenizer.consume();          tokenizer.exit(Token::LineEnding); -        State::Fn(Box::new(after_eol)) +        State::Fn(StateName::SpaceOrTabEolAfterEol)      } else { -        let state = if tokenizer.tokenize_state.space_or_tab_eol_ok { -            State::Ok -        } else { -            State::Nok -        }; +        let ok = tokenizer.tokenize_state.space_or_tab_eol_ok;          tokenizer.tokenize_state.space_or_tab_eol_content_type = None;          tokenizer.tokenize_state.space_or_tab_eol_connect = false;          tokenizer.tokenize_state.space_or_tab_eol_ok = false; -        state +        if ok { +            State::Ok +        } else { +            State::Nok +        }      }  } @@ -245,9 +255,10 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {  ///     ^  /// ```  #[allow(clippy::needless_pass_by_value)] -fn after_eol(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt_opt( -        space_or_tab_with_options(Options { +pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { +    let state_name = space_or_tab_with_options( +        tokenizer, +        Options {              kind: Token::SpaceOrTab,              min: 1,              max: usize::MAX, @@ -256,9 +267,9 @@ fn after_eol(tokenizer: &mut Tokenizer) -> State {                  .space_or_tab_eol_content_type                  .clone(),              connect: tokenizer.tokenize_state.space_or_tab_eol_connect, -        }), -        after_more_space_or_tab, -    )(tokenizer) +        }, +    ); +    tokenizer.attempt_opt(state_name, StateName::SpaceOrTabEolAfterMore)  }  /// `space_or_tab_eol`: after more (optional) `space_or_tab`. @@ -268,7 +279,7 @@ fn after_eol(tokenizer: &mut Tokenizer) -> State {  /// > | b  ///     ^  /// ``` -fn after_more_space_or_tab(tokenizer: &mut Tokenizer) -> State { +pub fn eol_after_more(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.space_or_tab_eol_content_type = None;      tokenizer.tokenize_state.space_or_tab_eol_connect = false;      tokenizer.tokenize_state.space_or_tab_eol_ok = false; diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 6bf9099..8b72608 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -30,10 +30,10 @@  //! [character_reference]: crate::construct::character_reference  //! [label_end]: crate::construct::label_end -use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions}; +use crate::construct::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};  use crate::subtokenize::link;  use crate::token::Token; -use crate::tokenizer::{ContentType, State, Tokenizer}; +use crate::tokenizer::{ContentType, State, StateName, Tokenizer};  /// Before a title.  /// @@ -50,7 +50,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              tokenizer.enter(tokenizer.tokenize_state.token_2.clone());              tokenizer.consume();              tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); -            State::Fn(Box::new(begin)) +            State::Fn(StateName::TitleBegin)          }          _ => State::Nok,      } @@ -64,7 +64,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | "a"  ///      ^  /// ``` -fn begin(tokenizer: &mut Tokenizer) -> State { +pub fn begin(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'"' | b'\'' | b')')              if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => @@ -90,20 +90,30 @@ fn begin(tokenizer: &mut Tokenizer) -> State {  /// > | "a"  ///      ^  /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => {              tokenizer.tokenize_state.marker = 0;              tokenizer.tokenize_state.connect = false;              State::Nok          } -        Some(b'\n') => tokenizer.attempt( -            space_or_tab_eol_with_options(EolOptions { -                content_type: Some(ContentType::String), -                connect: tokenizer.tokenize_state.connect, -            }), -            |ok| Box::new(if ok { after_eol } else { at_blank_line }), -        )(tokenizer), +        Some(b'\n') => { +            let state_name = space_or_tab_eol_with_options( +                tokenizer, +                EolOptions { +                    content_type: Some(ContentType::String), +                    connect: tokenizer.tokenize_state.connect, +                }, +            ); + +            tokenizer.attempt(state_name, |ok| { +                State::Fn(if ok { +                    StateName::TitleAfterEol +                } else { +                    StateName::TitleAtBlankLine +                }) +            }) +        }          Some(b'"' | b'\'' | b')')              if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>          { @@ -120,19 +130,19 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {                  tokenizer.tokenize_state.connect = true;              } -            title(tokenizer) +            inside(tokenizer)          }      }  }  /// To do. -fn after_eol(tokenizer: &mut Tokenizer) -> State { +pub fn after_eol(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.connect = true;      at_break(tokenizer)  }  /// To do. -fn at_blank_line(tokenizer: &mut Tokenizer) -> State { +pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.marker = 0;      tokenizer.tokenize_state.connect = false;      State::Nok @@ -144,7 +154,7 @@ fn at_blank_line(tokenizer: &mut Tokenizer) -> State {  /// > | "a"  ///      ^  /// ``` -fn title(tokenizer: &mut Tokenizer) -> State { +pub fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Token::Data); @@ -157,9 +167,12 @@ fn title(tokenizer: &mut Tokenizer) -> State {              at_break(tokenizer)          }          Some(byte) => { -            let func = if matches!(byte, b'\\') { escape } else { title };              tokenizer.consume(); -            State::Fn(Box::new(func)) +            State::Fn(if matches!(byte, b'\\') { +                StateName::TitleEscape +            } else { +                StateName::TitleInside +            })          }      }  } @@ -170,12 +183,12 @@ fn title(tokenizer: &mut Tokenizer) -> State {  /// > | "a\*b"  ///      ^  /// ``` -fn escape(tokenizer: &mut Tokenizer) -> State { +pub fn escape(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'"' | b'\'' | b')') => {              tokenizer.consume(); -            State::Fn(Box::new(title)) +            State::Fn(StateName::TitleInside)          } -        _ => title(tokenizer), +        _ => inside(tokenizer),      }  } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 2ed2046..4ed25b6 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -51,7 +51,7 @@  use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Start of a thematic break.  /// @@ -62,17 +62,17 @@ use crate::tokenizer::{State, Tokenizer};  pub fn start(tokenizer: &mut Tokenizer) -> State {      if tokenizer.parse_state.constructs.thematic_break {          tokenizer.enter(Token::ThematicBreak); -        tokenizer.go( -            space_or_tab_min_max( -                0, -                if tokenizer.parse_state.constructs.code_indented { -                    TAB_SIZE - 1 -                } else { -                    usize::MAX -                }, -            ), -            before, -        )(tokenizer) +        let state_name = space_or_tab_min_max( +            tokenizer, +            0, +            if tokenizer.parse_state.constructs.code_indented { +                TAB_SIZE - 1 +            } else { +                usize::MAX +            }, +        ); + +        tokenizer.go(state_name, StateName::ThematicBreakBefore)      } else {          State::Nok      } @@ -84,7 +84,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// > | ***  ///     ^  /// ``` -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'*' | b'-' | b'_') => {              tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); @@ -100,7 +100,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// > | ***  ///     ^  /// ``` -fn at_break(tokenizer: &mut Tokenizer) -> State { +pub fn at_break(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {              tokenizer.tokenize_state.marker = 0; @@ -130,18 +130,19 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {  /// > | ***  ///     ^  /// ``` -fn sequence(tokenizer: &mut Tokenizer) -> State { +pub fn sequence(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'*' | b'-' | b'_')              if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>          {              tokenizer.consume();              tokenizer.tokenize_state.size += 1; -            State::Fn(Box::new(sequence)) +            State::Fn(StateName::ThematicBreakSequence)          }          _ => {              tokenizer.exit(Token::ThematicBreakSequence); -            tokenizer.attempt_opt(space_or_tab(), at_break)(tokenizer) +            let state_name = space_or_tab(tokenizer); +            tokenizer.attempt_opt(state_name, StateName::ThematicBreakAtBreak)          }      }  } diff --git a/src/content/document.rs b/src/content/document.rs index 33c8ff9..7a43d48 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -8,16 +8,13 @@  //! *   [Block quote][crate::construct::block_quote]  //! *   [List][crate::construct::list] -use crate::construct::{ -    block_quote::{cont as block_quote_cont, start as block_quote}, -    list::{cont as list_item_const, start as list_item}, -    partial_bom::start as bom, -}; -use crate::content::flow::start as flow;  use crate::parser::ParseState;  use crate::subtokenize::subtokenize;  use crate::token::Token; -use crate::tokenizer::{Container, ContainerState, Event, EventType, Point, State, Tokenizer}; +use crate::tokenizer::{ +    Container, ContainerState, ContentType, Event, EventType, Link, Point, State, StateName, +    Tokenizer, +};  use crate::util::{      normalize_identifier::normalize_identifier,      skip, @@ -59,7 +56,7 @@ enum Phase {  pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {      let mut tokenizer = Tokenizer::new(point, parse_state); -    let state = tokenizer.push(0, parse_state.bytes.len(), Box::new(start)); +    let state = tokenizer.push(0, parse_state.bytes.len(), StateName::DocumentStart);      tokenizer.flush(state, true);      let mut index = 0; @@ -103,8 +100,13 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {  /// > | a  ///     ^  /// ``` -fn start(tokenizer: &mut Tokenizer) -> State { -    tokenizer.attempt_opt(bom, line_start)(tokenizer) +pub fn start(tokenizer: &mut Tokenizer) -> State { +    tokenizer.tokenize_state.child_tokenizer = Some(Box::new(Tokenizer::new( +        tokenizer.point.clone(), +        tokenizer.parse_state, +    ))); +    tokenizer.tokenize_state.document_child_state = Some(State::Fn(StateName::FlowStart)); +    tokenizer.attempt_opt(StateName::BomStart, StateName::DocumentLineStart)  }  /// Start of a line. @@ -115,13 +117,8 @@ fn start(tokenizer: &mut Tokenizer) -> State {  /// > | > b  ///     ^  /// ``` -fn line_start(tokenizer: &mut Tokenizer) -> State { +pub fn line_start(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.document_continued = 0; -    tokenizer.tokenize_state.document_index = tokenizer.events.len(); -    tokenizer -        .tokenize_state -        .document_inject -        .push((vec![], vec![]));      // Containers would only be interrupting if we’ve continued.      tokenizer.interrupt = false;      container_existing_before(tokenizer) @@ -134,7 +131,7 @@ fn line_start(tokenizer: &mut Tokenizer) -> State {  /// > | > b  ///     ^  /// ``` -fn container_existing_before(tokenizer: &mut Tokenizer) -> State { +pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {      // If there are more existing containers, check whether the next one continues.      if tokenizer.tokenize_state.document_continued          < tokenizer.tokenize_state.document_container_stack.len() @@ -143,19 +140,19 @@ fn container_existing_before(tokenizer: &mut Tokenizer) -> State {              .tokenize_state              .document_container_stack              .remove(tokenizer.tokenize_state.document_continued); -        let cont = match container.kind { -            Container::BlockQuote => block_quote_cont, -            Container::ListItem => list_item_const, +        let state_name = match container.kind { +            Container::BlockQuote => StateName::BlockQuoteContStart, +            Container::ListItem => StateName::ListContStart,          };          tokenizer.container = Some(container); -        tokenizer.attempt(cont, |ok| { -            Box::new(if ok { -                container_existing_after +        tokenizer.attempt(state_name, |ok| { +            State::Fn(if ok { +                StateName::DocumentContainerExistingAfter              } else { -                container_existing_missing +                StateName::DocumentContainerExistingMissing              }) -        })(tokenizer) +        })      }      // Otherwise, check new containers.      else { @@ -170,7 +167,7 @@ fn container_existing_before(tokenizer: &mut Tokenizer) -> State {  /// > | > b  ///     ^  /// ``` -fn container_existing_missing(tokenizer: &mut Tokenizer) -> State { +pub fn container_existing_missing(tokenizer: &mut Tokenizer) -> State {      let container = tokenizer.container.take().unwrap();      tokenizer          .tokenize_state @@ -186,7 +183,7 @@ fn container_existing_missing(tokenizer: &mut Tokenizer) -> State {  /// > |   b  ///       ^  /// ``` -fn container_existing_after(tokenizer: &mut Tokenizer) -> State { +pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State {      let container = tokenizer.container.take().unwrap();      tokenizer          .tokenize_state @@ -204,17 +201,28 @@ fn container_existing_after(tokenizer: &mut Tokenizer) -> State {  /// > | > b  ///     ^  /// ``` -fn container_new_before(tokenizer: &mut Tokenizer) -> State { +pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {      // If we have completely continued, restore the flow’s past `interrupt`      // status.      if tokenizer.tokenize_state.document_continued          == tokenizer.tokenize_state.document_container_stack.len()      { -        tokenizer.interrupt = tokenizer.tokenize_state.document_interrupt_before; +        tokenizer.interrupt = tokenizer +            .tokenize_state +            .child_tokenizer +            .as_ref() +            .unwrap() +            .interrupt;          // …and if we’re in a concrete construct, new containers can’t “pierce”          // into them. -        if tokenizer.concrete { +        if tokenizer +            .tokenize_state +            .child_tokenizer +            .as_ref() +            .unwrap() +            .concrete +        {              return containers_after(tokenizer);          }      } @@ -227,17 +235,17 @@ fn container_new_before(tokenizer: &mut Tokenizer) -> State {          size: 0,      }); -    tokenizer.attempt(block_quote, |ok| { -        Box::new(if ok { -            container_new_after +    tokenizer.attempt(StateName::BlockQuoteStart, |ok| { +        State::Fn(if ok { +            StateName::DocumentContainerNewAfter          } else { -            container_new_before_not_blockquote +            StateName::DocumentContainerNewBeforeNotBlockQuote          }) -    })(tokenizer) +    })  }  /// To do. -fn container_new_before_not_blockquote(tokenizer: &mut Tokenizer) -> State { +pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State {      // List item?      tokenizer.container = Some(ContainerState {          kind: Container::ListItem, @@ -245,13 +253,13 @@ fn container_new_before_not_blockquote(tokenizer: &mut Tokenizer) -> State {          size: 0,      }); -    tokenizer.attempt(list_item, |ok| { -        Box::new(if ok { -            container_new_after +    tokenizer.attempt(StateName::ListStart, |ok| { +        State::Fn(if ok { +            StateName::DocumentContainerNewAfter          } else { -            containers_after +            StateName::DocumentContainersAfter          }) -    })(tokenizer) +    })  }  /// After a new container. @@ -262,31 +270,9 @@ fn container_new_before_not_blockquote(tokenizer: &mut Tokenizer) -> State {  /// > | > b  ///       ^  /// ``` -fn container_new_after(tokenizer: &mut Tokenizer) -> State { +pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {      let container = tokenizer.container.take().unwrap(); -    // Remove from the event stack. -    // We’ll properly add exits at different points manually. -    let token_type = match container.kind { -        Container::BlockQuote => Token::BlockQuote, -        Container::ListItem => Token::ListItem, -    }; - -    let mut stack_index = tokenizer.stack.len(); -    let mut found = false; - -    while stack_index > 0 { -        stack_index -= 1; - -        if tokenizer.stack[stack_index] == token_type { -            tokenizer.stack.remove(stack_index); -            found = true; -            break; -        } -    } - -    debug_assert!(found, "expected to find container token to exit"); -      // If we did not continue all existing containers, and there is a new one,      // close the flow and those containers.      if tokenizer.tokenize_state.document_continued @@ -314,37 +300,55 @@ fn container_new_after(tokenizer: &mut Tokenizer) -> State {  /// > | > b  ///       ^  /// ``` -fn containers_after(tokenizer: &mut Tokenizer) -> State { -    // Store the container events we parsed. -    tokenizer -        .tokenize_state -        .document_inject -        .last_mut() -        .unwrap() -        .0 -        .append( -            &mut tokenizer -                .events -                .split_off(tokenizer.tokenize_state.document_index), -        ); +pub fn containers_after(tokenizer: &mut Tokenizer) -> State { +    if let Some(ref mut child) = tokenizer.tokenize_state.child_tokenizer { +        child.lazy = tokenizer.tokenize_state.document_continued +            != tokenizer.tokenize_state.document_container_stack.len(); +        child.interrupt = tokenizer.tokenize_state.document_interrupt_before; +        child.define_skip(tokenizer.point.clone()); +    } -    tokenizer.lazy = tokenizer.tokenize_state.document_continued -        != tokenizer.tokenize_state.document_container_stack.len(); -    tokenizer.interrupt = tokenizer.tokenize_state.document_interrupt_before; -    tokenizer.define_skip_current(); +    match tokenizer.current { +        // Note: EOL is part of data. +        None => flow_end(tokenizer), +        Some(_) => { +            let current = tokenizer.events.len(); +            let previous = tokenizer.tokenize_state.document_data_index.take(); +            if let Some(previous) = previous { +                tokenizer.events[previous].link.as_mut().unwrap().next = Some(current); +            } +            tokenizer.tokenize_state.document_data_index = Some(current); +            tokenizer.enter_with_link( +                Token::Data, +                Some(Link { +                    previous, +                    next: None, +                    content_type: ContentType::Flow, +                }), +            ); +            flow_inside(tokenizer) +        } +    } +} -    let state = tokenizer -        .tokenize_state -        .document_next -        .take() -        .unwrap_or_else(|| Box::new(flow)); - -    // Parse flow, pausing after eols. -    tokenizer.go_until( -        state, -        |code| matches!(code, Some(b'\n')), -        |state| Box::new(|t| flow_end(t, state)), -    )(tokenizer) +/// To do. +pub fn flow_inside(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        None => { +            tokenizer.exit(Token::Data); +            flow_end(tokenizer) +        } +        // Note: EOL is part of data. +        Some(b'\n') => { +            tokenizer.consume(); +            tokenizer.exit(Token::Data); +            State::Fn(StateName::DocumentFlowEnd) +        } +        Some(_) => { +            tokenizer.consume(); +            State::Fn(StateName::DocumentFlowInside) +        } +    }  }  /// After flow (after eol or at eof). @@ -354,42 +358,70 @@ fn containers_after(tokenizer: &mut Tokenizer) -> State {  /// > | > b  ///     ^  ^  /// ``` -fn flow_end(tokenizer: &mut Tokenizer, result: State) -> State { -    let paragraph = !tokenizer.events.is_empty() -        && tokenizer.events[skip::opt_back( -            &tokenizer.events, -            tokenizer.events.len() - 1, -            &[Token::LineEnding], -        )] -        .token_type -            == Token::Paragraph; - -    if tokenizer.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before { -        tokenizer.tokenize_state.document_continued = -            tokenizer.tokenize_state.document_container_stack.len(); -    } - -    if tokenizer.tokenize_state.document_continued -        != tokenizer.tokenize_state.document_container_stack.len() +pub fn flow_end(tokenizer: &mut Tokenizer) -> State { +    let mut paragraph = false; +    let mut interrupt = false; + +    // We have new data. +    // Note that everything except for a `null` is data. +    if tokenizer.events.len() > 1 +        && tokenizer.events[tokenizer.events.len() - 1].token_type == Token::Data      { -        exit_containers(tokenizer, &Phase::After); -    } +        let position = Position::from_exit_event(&tokenizer.events, tokenizer.events.len() - 1); + +        let state = tokenizer +            .tokenize_state +            .document_child_state +            .take() +            .unwrap_or(State::Fn(StateName::FlowStart)); + +        let state_name = match state { +            State::Fn(state_name) => state_name, +            _ => unreachable!("expected state name"), +        }; + +        if let Some(ref mut child) = tokenizer.tokenize_state.child_tokenizer { +            // To do: handle VS? +            // if position.start.vs > 0 { +            // } +            let state = child.push(position.start.index, position.end.index, state_name); + +            interrupt = child.interrupt; +            paragraph = matches!(state, State::Fn(StateName::ParagraphInside)) +                || (!child.events.is_empty() +                    && child.events[skip::opt_back( +                        &child.events, +                        child.events.len() - 1, +                        &[Token::LineEnding], +                    )] +                    .token_type +                        == Token::Paragraph); + +            tokenizer.tokenize_state.document_child_state = Some(state); + +            if child.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before { +                tokenizer.tokenize_state.document_continued = +                    tokenizer.tokenize_state.document_container_stack.len(); +            } -    match result { -        State::Ok => { -            if !tokenizer.tokenize_state.document_container_stack.is_empty() { -                tokenizer.tokenize_state.document_continued = 0; -                exit_containers(tokenizer, &Phase::Eof); +            if tokenizer.tokenize_state.document_continued +                != tokenizer.tokenize_state.document_container_stack.len() +            { +                exit_containers(tokenizer, &Phase::After);              } +        } +    } +    match tokenizer.current { +        None => { +            tokenizer.tokenize_state.document_continued = 0; +            exit_containers(tokenizer, &Phase::Eof);              resolve(tokenizer);              State::Ok          } -        State::Nok => unreachable!("unexpected `nok` from flow"), -        State::Fn(func) => { +        Some(_) => {              tokenizer.tokenize_state.document_paragraph_before = paragraph; -            tokenizer.tokenize_state.document_interrupt_before = tokenizer.interrupt; -            tokenizer.tokenize_state.document_next = Some(func); +            tokenizer.tokenize_state.document_interrupt_before = interrupt;              line_start(tokenizer)          }      } @@ -403,98 +435,248 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {          .split_off(tokenizer.tokenize_state.document_continued);      // So, we’re at the end of a line, but we need to close the *previous* line. -    if *phase != Phase::Eof { -        tokenizer.define_skip_current(); -        let mut current_events = tokenizer -            .events -            .split_off(tokenizer.tokenize_state.document_index); -        let state = tokenizer -            .tokenize_state -            .document_next -            .take() -            .unwrap_or_else(|| Box::new(flow)); -        tokenizer.flush(State::Fn(state), false); - -        if *phase == Phase::Prefix { -            tokenizer.tokenize_state.document_index = tokenizer.events.len(); +    if let Some(ref mut child) = tokenizer.tokenize_state.child_tokenizer { +        if *phase != Phase::After { +            let state = tokenizer +                .tokenize_state +                .document_child_state +                .take() +                .unwrap_or(State::Fn(StateName::FlowStart)); + +            child.flush(state, false);          } -        tokenizer.events.append(&mut current_events); -    } +        if !stack_close.is_empty() { +            let mut inject_index = tokenizer.events.len(); -    let mut exits = Vec::with_capacity(stack_close.len()); +            // Move past the current data to find the last container start if we’re +            // closing due to a potential lazy flow that was not lazy. +            if *phase == Phase::After { +                inject_index -= 2; +            } -    while !stack_close.is_empty() { -        let container = stack_close.pop().unwrap(); -        let token_type = match container.kind { -            Container::BlockQuote => Token::BlockQuote, -            Container::ListItem => Token::ListItem, -        }; +            // Move past the container starts to find the last data if we’re +            // closing due to a different container or lazy flow like above. +            if *phase == Phase::After || *phase == Phase::Prefix { +                while inject_index > 0 { +                    let event = &tokenizer.events[inject_index - 1]; + +                    if event.token_type == Token::Data { +                        break; +                    } + +                    inject_index -= 1; +                } +            } + +            // Move past data starts that are just whitespace only without +            // container starts. +            while inject_index > 0 { +                let event = &tokenizer.events[inject_index - 1]; + +                if event.token_type == Token::Data { +                    if event.event_type == EventType::Exit { +                        let slice = Slice::from_position( +                            tokenizer.parse_state.bytes, +                            &Position::from_exit_event(&tokenizer.events, inject_index - 1), +                        ); +                        let bytes = slice.bytes; +                        let mut whitespace = true; +                        let mut index = 0; +                        while index < bytes.len() { +                            match bytes[index] { +                                b'\t' | b'\n' | b'\r' | b' ' => index += 1, +                                _ => { +                                    whitespace = false; +                                    break; +                                } +                            } +                        } + +                        if !whitespace { +                            break; +                        } +                    } +                } else { +                    break; +                } + +                inject_index -= 1; +            } + +            let ref_point = if inject_index == tokenizer.events.len() { +                tokenizer.point.clone() +            } else { +                tokenizer.events[inject_index].point.clone() +            }; + +            let mut exits = Vec::with_capacity(stack_close.len()); + +            while !stack_close.is_empty() { +                let container = stack_close.pop().unwrap(); +                let token_type = match container.kind { +                    Container::BlockQuote => Token::BlockQuote, +                    Container::ListItem => Token::ListItem, +                }; + +                exits.push(Event { +                    event_type: EventType::Exit, +                    token_type: token_type.clone(), +                    point: ref_point.clone(), +                    link: None, +                }); + +                let mut stack_index = tokenizer.stack.len(); +                let mut found = false; + +                while stack_index > 0 { +                    stack_index -= 1; + +                    if tokenizer.stack[stack_index] == token_type { +                        tokenizer.stack.remove(stack_index); +                        found = true; +                        break; +                    } +                } + +                debug_assert!(found, "expected to find container token to exit"); +            } -        exits.push(Event { -            event_type: EventType::Exit, -            token_type: token_type.clone(), -            // Note: positions are fixed later. -            point: tokenizer.point.clone(), -            link: None, -        }); +            tokenizer.map.add(inject_index, 0, exits); +        }      } -    let index = -        tokenizer.tokenize_state.document_inject.len() - (if *phase == Phase::Eof { 1 } else { 2 }); -    tokenizer.tokenize_state.document_inject[index] -        .1 -        .append(&mut exits);      tokenizer.tokenize_state.document_interrupt_before = false;  }  // Inject the container events.  fn resolve(tokenizer: &mut Tokenizer) { -    let mut index = 0; -    let mut inject = tokenizer.tokenize_state.document_inject.split_off(0); -    inject.reverse(); -    let mut first_line_ending_in_run = None; - -    while let Some((before, mut after)) = inject.pop() { -        if !before.is_empty() { -            first_line_ending_in_run = None; -            tokenizer.map.add(index, 0, before); -        } +    let mut child = tokenizer.tokenize_state.child_tokenizer.take().unwrap(); +    child.map.consume(&mut child.events); +    // To do: see if we can do this less. +    tokenizer.map.consume(&mut tokenizer.events); -        while index < tokenizer.events.len() { -            let event = &tokenizer.events[index]; +    let mut link_index = skip::to(&tokenizer.events, 0, &[Token::Data]); +    // To do: share this code with `subtokenize`. +    // Now, loop through all subevents to figure out which parts +    // belong where and fix deep links. +    let mut subindex = 0; +    let mut slices = vec![]; +    let mut slice_start = 0; +    let mut old_prev: Option<usize> = None; + +    while subindex < child.events.len() { +        // Find the first event that starts after the end we’re looking +        // for. +        if child.events[subindex].event_type == EventType::Enter +            && child.events[subindex].point.index >= tokenizer.events[link_index + 1].point.index +        { +            slices.push((link_index, slice_start)); +            slice_start = subindex; +            link_index = tokenizer.events[link_index] +                .link +                .as_ref() +                .unwrap() +                .next +                .unwrap(); +        } -            if event.token_type == Token::LineEnding || event.token_type == Token::BlankLineEnding { -                if event.event_type == EventType::Enter { -                    first_line_ending_in_run = first_line_ending_in_run.or(Some(index)); +        // Fix sublinks. +        if let Some(sublink_curr) = &child.events[subindex].link { +            if sublink_curr.previous.is_some() { +                let old_prev = old_prev.unwrap(); +                let prev_event = &mut child.events[old_prev]; +                // The `index` in `events` where the current link is, +                // minus one to get the previous link, +                // minus 2 events (the enter and exit) for each removed +                // link. +                let new_link = if slices.is_empty() { +                    old_prev + link_index + 2                  } else { -                    index += 1; -                    break; -                } -            } else if event.token_type == Token::SpaceOrTab { -                // Empty to allow whitespace in blank lines. -            } else if first_line_ending_in_run.is_some() { -                first_line_ending_in_run = None; +                    old_prev + link_index - (slices.len() - 1) * 2 +                }; +                prev_event.link.as_mut().unwrap().next = Some(new_link);              } +        } -            index += 1; +        // If there is a `next` link in the subevents, we have to change +        // its `previous` index to account for the shifted events. +        // If it points to a next event, we also change the next event’s +        // reference back to *this* event. +        if let Some(sublink_curr) = &child.events[subindex].link { +            if let Some(next) = sublink_curr.next { +                let sublink_next = child.events[next].link.as_mut().unwrap(); + +                old_prev = sublink_next.previous; + +                sublink_next.previous = sublink_next +                    .previous +                    // The `index` in `events` where the current link is, +                    // minus 2 events (the enter and exit) for each removed +                    // link. +                    .map(|previous| previous + link_index - (slices.len() * 2)); +            }          } -        let point_rel = if let Some(index) = first_line_ending_in_run { -            &tokenizer.events[index].point -        } else { -            &tokenizer.point -        }; +        subindex += 1; +    } -        let close_index = first_line_ending_in_run.unwrap_or(index); +    if !child.events.is_empty() { +        slices.push((link_index, slice_start)); +    } + +    // Finally, inject the subevents. +    let mut index = slices.len(); + +    while index > 0 { +        index -= 1; +        let start = slices[index].0; +        tokenizer.map.add( +            start, +            if start == tokenizer.events.len() { +                0 +            } else { +                2 +            }, +            child.events.split_off(slices[index].1), +        ); +    } +    // To do: share the above code with `subtokenize`. -        let mut subevent_index = 0; -        while subevent_index < after.len() { -            after[subevent_index].point = point_rel.clone(); -            subevent_index += 1; +    let mut resolvers = child.resolvers.split_off(0); +    let mut resolver_ids = child.resolver_ids.split_off(0); +    tokenizer.resolvers.append(&mut resolvers); +    tokenizer.resolver_ids.append(&mut resolver_ids); + +    // To do: see if we can do this less. +    tokenizer.map.consume(&mut tokenizer.events); + +    let mut index = 0; +    let mut last_eol_enter: Option<usize> = None; +    while index < tokenizer.events.len() { +        let event = &tokenizer.events[index]; + +        if event.event_type == EventType::Exit { +            if event.token_type == Token::BlockQuote || event.token_type == Token::ListItem { +                if let Some(inject) = last_eol_enter { +                    let point = tokenizer.events[inject].point.clone(); +                    let mut clone = event.clone(); +                    clone.point = point; +                    // Inject a fixed exit. +                    tokenizer.map.add(inject, 0, vec![clone]); +                    // Remove this exit. +                    tokenizer.map.add(index, 1, vec![]); +                } +            } else if event.token_type == Token::LineEnding +                || event.token_type == Token::BlankLineEnding +            { +                last_eol_enter = Some(index - 1); +            } else { +                last_eol_enter = None; +            }          } -        tokenizer.map.add(close_index, 0, after); +        index += 1;      }      tokenizer.map.consume(&mut tokenizer.events); diff --git a/src/content/flow.rs b/src/content/flow.rs index bf4104c..6f62901 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -19,15 +19,8 @@  //! *   [HTML (flow)][crate::construct::html_flow]  //! *   [Thematic break][crate::construct::thematic_break] -use crate::construct::{ -    blank_line::start as blank_line, code_fenced::start as code_fenced, -    code_indented::start as code_indented, definition::start as definition, -    heading_atx::start as heading_atx, heading_setext::start as heading_setext, -    html_flow::start as html_flow, paragraph::start as paragraph, -    thematic_break::start as thematic_break, -};  use crate::token::Token; -use crate::tokenizer::{State, Tokenizer}; +use crate::tokenizer::{State, StateName, Tokenizer};  /// Before flow.  /// @@ -42,9 +35,13 @@ use crate::tokenizer::{State, Tokenizer};  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Ok, -        _ => tokenizer.attempt(blank_line, |ok| { -            Box::new(if ok { blank_line_after } else { initial_before }) -        })(tokenizer), +        _ => tokenizer.attempt(StateName::BlankLineStart, |ok| { +            State::Fn(if ok { +                StateName::FlowBlankLineAfter +            } else { +                StateName::FlowBefore +            }) +        }),      }  } @@ -60,21 +57,27 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// |~~~js  /// |<div>  /// ``` -fn initial_before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Ok,          _ => tokenizer.attempt_n(              vec![ -                Box::new(code_indented), -                Box::new(code_fenced), -                Box::new(html_flow), -                Box::new(heading_atx), -                Box::new(heading_setext), -                Box::new(thematic_break), -                Box::new(definition), +                StateName::CodeIndentedStart, +                StateName::CodeFencedStart, +                StateName::HtmlFlowStart, +                StateName::HeadingAtxStart, +                StateName::HeadingSetextStart, +                StateName::ThematicBreakStart, +                StateName::DefinitionStart,              ], -            |ok| Box::new(if ok { after } else { before_paragraph }), -        )(tokenizer), +            |ok| { +                State::Fn(if ok { +                    StateName::FlowAfter +                } else { +                    StateName::FlowBeforeParagraph +                }) +            }, +        ),      }  } @@ -85,7 +88,7 @@ fn initial_before(tokenizer: &mut Tokenizer) -> State {  /// ```markdown  /// ␠␠|  /// ``` -fn blank_line_after(tokenizer: &mut Tokenizer) -> State { +pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Ok,          Some(b'\n') => { @@ -94,7 +97,7 @@ fn blank_line_after(tokenizer: &mut Tokenizer) -> State {              tokenizer.exit(Token::BlankLineEnding);              // Feel free to interrupt.              tokenizer.interrupt = false; -            State::Fn(Box::new(start)) +            State::Fn(StateName::FlowStart)          }          _ => unreachable!("expected eol/eof"),      } @@ -109,14 +112,14 @@ fn blank_line_after(tokenizer: &mut Tokenizer) -> State {  /// asd  /// ~~~|  /// ``` -fn after(tokenizer: &mut Tokenizer) -> State { +pub fn after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Ok,          Some(b'\n') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); -            State::Fn(Box::new(start)) +            State::Fn(StateName::FlowStart)          }          _ => unreachable!("expected eol/eof"),      } @@ -127,6 +130,6 @@ fn after(tokenizer: &mut Tokenizer) -> State {  /// ```markdown  /// |asd  /// ``` -fn before_paragraph(tokenizer: &mut Tokenizer) -> State { -    tokenizer.go(paragraph, after)(tokenizer) +pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { +    tokenizer.go(StateName::ParagraphStart, StateName::FlowAfter)  } diff --git a/src/content/string.rs b/src/content/string.rs index 2e738fb..697ec2c 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -12,11 +12,8 @@  //!  //! [text]: crate::content::text -use crate::construct::{ -    character_escape::start as character_escape, character_reference::start as character_reference, -    partial_data::start as data, partial_whitespace::resolve_whitespace, -}; -use crate::tokenizer::{State, Tokenizer}; +use crate::construct::partial_whitespace::resolve_whitespace; +use crate::tokenizer::{State, StateName, Tokenizer};  const MARKERS: [u8; 2] = [b'&', b'\\']; @@ -28,19 +25,28 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  }  /// Before string. -fn before(tokenizer: &mut Tokenizer) -> State { +pub fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Ok,          _ => tokenizer.attempt_n( -            vec![Box::new(character_reference), Box::new(character_escape)], -            |ok| Box::new(if ok { before } else { before_data }), -        )(tokenizer), +            vec![ +                StateName::CharacterReferenceStart, +                StateName::CharacterEscapeStart, +            ], +            |ok| { +                State::Fn(if ok { +                    StateName::StringBefore +                } else { +                    StateName::StringBeforeData +                }) +            }, +        ),      }  }  /// At data. -fn before_data(tokenizer: &mut Tokenizer) -> State { -    tokenizer.go(data, before)(tokenizer) +pub fn before_data(tokenizer: &mut Tokenizer) -> State { +    tokenizer.go(StateName::DataStart, StateName::StringBefore)  }  /// Resolve whitespace. diff --git a/src/content/text.rs b/src/content/text.rs index f4666d1..d8a2726 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -20,15 +20,8 @@  //! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by  //! > [whitespace][crate::construct::partial_whitespace]. -use crate::construct::{ -    attention::start as attention, autolink::start as autolink, -    character_escape::start as character_escape, character_reference::start as character_reference, -    code_text::start as code_text, hard_break_escape::start as hard_break_escape, -    html_text::start as html_text, label_end::start as label_end, -    label_start_image::start as label_start_image, label_start_link::start as label_start_link, -    partial_data::start as data, partial_whitespace::resolve_whitespace, -}; -use crate::tokenizer::{State, Tokenizer}; +use crate::construct::partial_whitespace::resolve_whitespace; +use crate::tokenizer::{State, StateName, Tokenizer};  const MARKERS: [u8; 9] = [      b'!',  // `label_start_image` @@ -55,19 +48,25 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {          None => State::Ok,          _ => tokenizer.attempt_n(              vec![ -                Box::new(attention), -                Box::new(autolink), -                Box::new(character_escape), -                Box::new(character_reference), -                Box::new(code_text), -                Box::new(hard_break_escape), -                Box::new(html_text), -                Box::new(label_end), -                Box::new(label_start_image), -                Box::new(label_start_link), +                StateName::AttentionStart, +                StateName::AutolinkStart, +                StateName::CharacterEscapeStart, +                StateName::CharacterReferenceStart, +                StateName::CodeTextStart, +                StateName::HardBreakEscapeStart, +                StateName::HtmlTextStart, +                StateName::LabelEndStart, +                StateName::LabelStartImageStart, +                StateName::LabelStartLinkStart,              ], -            |ok| Box::new(if ok { before } else { before_data }), -        )(tokenizer), +            |ok| { +                State::Fn(if ok { +                    StateName::TextBefore +                } else { +                    StateName::TextBeforeData +                }) +            }, +        ),      }  } @@ -76,8 +75,8 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {  /// ```markdown  /// |qwe  /// ``` -fn before_data(tokenizer: &mut Tokenizer) -> State { -    tokenizer.go(data, before)(tokenizer) +pub fn before_data(tokenizer: &mut Tokenizer) -> State { +    tokenizer.go(StateName::DataStart, StateName::TextBefore)  }  /// Resolve whitespace. diff --git a/src/subtokenize.rs b/src/subtokenize.rs index c641419..b080b46 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -21,9 +21,8 @@  //! thus the whole document needs to be parsed up to the level of definitions,  //! before any level that can include references can be parsed. -use crate::content::{string::start as string, text::start as text};  use crate::parser::ParseState; -use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer}; +use crate::tokenizer::{ContentType, Event, EventType, State, StateName, Tokenizer};  use crate::util::edit_map::EditMap;  /// Create a link between two [`Event`][]s. @@ -79,11 +78,11 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {                  // Subtokenizer.                  let mut tokenizer = Tokenizer::new(event.point.clone(), parse_state);                  // Substate. -                let mut state = State::Fn(Box::new(if link.content_type == ContentType::String { -                    string +                let mut state = State::Fn(if link.content_type == ContentType::String { +                    StateName::StringStart                  } else { -                    text -                })); +                    StateName::TextStart +                });                  // Loop through links to pass them in order to the subtokenizer.                  while let Some(index) = link_index { @@ -92,7 +91,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {                      debug_assert_eq!(enter.event_type, EventType::Enter);                      if link_curr.previous != None { -                        tokenizer.define_skip(&enter.point); +                        tokenizer.define_skip(enter.point.clone());                      }                      state = tokenizer.push( diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3068ddf..7d28b77 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -12,6 +12,8 @@  //! [`check`]: Tokenizer::check  use crate::constant::TAB_SIZE; +use crate::construct; +use crate::content;  use crate::parser::ParseState;  use crate::token::{Token, VOID_TOKENS};  use crate::util::edit_map::EditMap; @@ -19,10 +21,12 @@ use crate::util::edit_map::EditMap;  /// Embedded content type.  #[derive(Debug, Clone, PartialEq)]  pub enum ContentType { -    /// Represents [text content][crate::content::text]. -    Text, +    /// Represents [flow content][crate::content::flow]. +    Flow,      /// Represents [string content][crate::content::string].      String, +    /// Represents [text content][crate::content::text]. +    Text,  }  #[derive(Debug, PartialEq)] @@ -79,10 +83,9 @@ pub struct Event {      pub link: Option<Link>,  } -/// The essence of the state machine are functions: `StateFn`. -/// It’s responsible for dealing with the current byte. -/// It yields a [`State`][]. -pub type StateFn = dyn FnOnce(&mut Tokenizer) -> State; +pub struct Attempt { +    done: Box<dyn FnOnce(&mut Tokenizer, State) -> State + 'static>, +}  /// Callback that can be registered and is called when the tokenizer is done.  /// @@ -91,10 +94,619 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer) -> State;  /// the compiler and other users.  pub type Resolver = dyn FnOnce(&mut Tokenizer); +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum StateName { +    AttentionStart, +    AttentionInside, + +    AutolinkStart, +    AutolinkOpen, +    AutolinkSchemeOrEmailAtext, +    AutolinkSchemeInsideOrEmailAtext, +    AutolinkUrlInside, +    AutolinkEmailAtSignOrDot, +    AutolinkEmailAtext, +    AutolinkEmailValue, +    AutolinkEmailLabel, + +    BlankLineStart, +    BlankLineAfter, + +    BlockQuoteStart, +    BlockQuoteBefore, +    BlockQuoteContStart, +    BlockQuoteContBefore, +    BlockQuoteContAfter, + +    BomStart, +    BomInside, + +    CharacterEscapeStart, +    CharacterEscapeInside, + +    CharacterReferenceStart, +    CharacterReferenceOpen, +    CharacterReferenceNumeric, +    CharacterReferenceValue, + +    CodeFencedStart, +    CodeFencedBeforeSequenceOpen, +    CodeFencedSequenceOpen, +    CodeFencedInfoBefore, +    CodeFencedInfo, +    CodeFencedMetaBefore, +    CodeFencedMeta, +    CodeFencedAtNonLazyBreak, +    CodeFencedCloseBefore, +    CodeFencedCloseStart, +    CodeFencedBeforeSequenceClose, +    CodeFencedSequenceClose, +    CodeFencedAfterSequenceClose, +    CodeFencedContentBefore, +    CodeFencedContentStart, +    CodeFencedBeforeContentChunk, +    CodeFencedContentChunk, +    CodeFencedAfter, + +    CodeIndentedStart, +    CodeIndentedAtBreak, +    CodeIndentedAfter, +    CodeIndentedFurtherStart, +    CodeIndentedInside, +    CodeIndentedFurtherEnd, +    CodeIndentedFurtherBegin, +    CodeIndentedFurtherAfter, + +    CodeTextStart, +    CodeTextSequenceOpen, +    CodeTextBetween, +    CodeTextData, +    CodeTextSequenceClose, + +    DataStart, +    DataInside, +    DataAtBreak, + +    DefinitionStart, +    DefinitionBefore, +    DefinitionLabelAfter, +    DefinitionMarkerAfter, +    DefinitionDestinationBefore, +    DefinitionDestinationAfter, +    DefinitionDestinationMissing, +    DefinitionTitleBefore, +    DefinitionAfter, +    DefinitionAfterWhitespace, +    DefinitionTitleBeforeMarker, +    DefinitionTitleAfter, +    DefinitionTitleAfterOptionalWhitespace, + +    DestinationStart, +    DestinationEnclosedBefore, +    DestinationEnclosed, +    DestinationEnclosedEscape, +    DestinationRaw, +    DestinationRawEscape, + +    DocumentStart, +    DocumentLineStart, +    // DocumentContainerExistingBefore, +    DocumentContainerExistingAfter, +    DocumentContainerExistingMissing, +    // DocumentContainerNewBefore, +    DocumentContainerNewBeforeNotBlockQuote, +    DocumentContainerNewAfter, +    DocumentContainersAfter, +    DocumentFlowInside, +    DocumentFlowEnd, + +    FlowStart, +    FlowBefore, +    FlowAfter, +    FlowBlankLineAfter, +    FlowBeforeParagraph, + +    HardBreakEscapeStart, +    HardBreakEscapeAfter, + +    HeadingAtxStart, +    HeadingAtxBefore, +    HeadingAtxSequenceOpen, +    HeadingAtxAtBreak, +    HeadingAtxSequenceFurther, +    HeadingAtxData, + +    HeadingSetextStart, +    HeadingSetextBefore, +    HeadingSetextInside, +    HeadingSetextAfter, + +    HtmlFlowStart, +    HtmlFlowBefore, +    HtmlFlowOpen, +    HtmlFlowDeclarationOpen, +    HtmlFlowCommentOpenInside, +    HtmlFlowCdataOpenInside, +    HtmlFlowTagCloseStart, +    HtmlFlowTagName, +    HtmlFlowBasicSelfClosing, +    HtmlFlowCompleteClosingTagAfter, +    HtmlFlowCompleteEnd, +    HtmlFlowCompleteAttributeNameBefore, +    HtmlFlowCompleteAttributeName, +    HtmlFlowCompleteAttributeNameAfter, +    HtmlFlowCompleteAttributeValueBefore, +    HtmlFlowCompleteAttributeValueQuoted, +    HtmlFlowCompleteAttributeValueQuotedAfter, +    HtmlFlowCompleteAttributeValueUnquoted, +    HtmlFlowCompleteAfter, +    HtmlFlowBlankLineBefore, +    HtmlFlowContinuation, +    HtmlFlowContinuationDeclarationInside, +    HtmlFlowContinuationAfter, +    HtmlFlowContinuationStart, +    HtmlFlowContinuationBefore, +    HtmlFlowContinuationCommentInside, +    HtmlFlowContinuationRawTagOpen, +    HtmlFlowContinuationRawEndTag, +    HtmlFlowContinuationClose, +    HtmlFlowContinuationCdataInside, +    HtmlFlowContinuationStartNonLazy, + +    HtmlTextStart, +    HtmlTextOpen, +    HtmlTextDeclarationOpen, +    HtmlTextTagCloseStart, +    HtmlTextTagClose, +    HtmlTextTagCloseBetween, +    HtmlTextTagOpen, +    HtmlTextTagOpenBetween, +    HtmlTextTagOpenAttributeName, +    HtmlTextTagOpenAttributeNameAfter, +    HtmlTextTagOpenAttributeValueBefore, +    HtmlTextTagOpenAttributeValueQuoted, +    HtmlTextTagOpenAttributeValueQuotedAfter, +    HtmlTextTagOpenAttributeValueUnquoted, +    HtmlTextCdata, +    HtmlTextCdataOpenInside, +    HtmlTextCdataClose, +    HtmlTextCdataEnd, +    HtmlTextCommentOpenInside, +    HtmlTextCommentStart, +    HtmlTextCommentStartDash, +    HtmlTextComment, +    HtmlTextCommentClose, +    HtmlTextDeclaration, +    HtmlTextEnd, +    HtmlTextInstruction, +    HtmlTextInstructionClose, +    HtmlTextLineEndingAfter, +    HtmlTextLineEndingAfterPrefix, + +    LabelStart, +    LabelAtBreak, +    LabelEolAfter, +    LabelAtBlankLine, +    LabelEscape, +    LabelInside, + +    LabelEndStart, +    LabelEndAfter, +    LabelEndResourceStart, +    LabelEndResourceBefore, +    LabelEndResourceOpen, +    LabelEndResourceDestinationAfter, +    LabelEndResourceDestinationMissing, +    LabelEndResourceBetween, +    LabelEndResourceTitleAfter, +    LabelEndResourceEnd, +    LabelEndOk, +    LabelEndNok, +    LabelEndReferenceFull, +    LabelEndReferenceFullAfter, +    LabelEndReferenceNotFull, +    LabelEndReferenceCollapsed, +    LabelEndReferenceCollapsedOpen, + +    LabelStartImageStart, +    LabelStartImageOpen, + +    LabelStartLinkStart, + +    ListStart, +    ListBefore, +    ListNok, +    ListBeforeUnordered, +    ListValue, +    ListMarkerAfter, +    ListAfter, +    ListMarkerAfterFilled, +    ListWhitespace, +    ListPrefixOther, +    ListWhitespaceAfter, +    ListContStart, +    ListContBlank, +    ListContFilled, +    ListOk, + +    NonLazyContinuationStart, +    NonLazyContinuationAfter, + +    ParagraphStart, +    ParagraphInside, + +    SpaceOrTabStart, +    SpaceOrTabInside, + +    SpaceOrTabEolStart, +    SpaceOrTabEolAfterFirst, +    SpaceOrTabEolAfterEol, +    SpaceOrTabEolAtEol, +    SpaceOrTabEolAfterMore, + +    StringStart, +    StringBefore, +    StringBeforeData, + +    TextStart, +    TextBefore, +    TextBeforeData, + +    ThematicBreakStart, +    ThematicBreakBefore, +    ThematicBreakSequence, +    ThematicBreakAtBreak, + +    TitleStart, +    TitleBegin, +    TitleAfterEol, +    TitleAtBlankLine, +    TitleEscape, +    TitleInside, +} + +impl StateName { +    /// Create a new tokenizer. +    #[allow(clippy::too_many_lines)] +    pub fn to_func(self) -> Box<dyn FnOnce(&mut Tokenizer) -> State + 'static> { +        let func = match self { +            StateName::AttentionStart => construct::attention::start, +            StateName::AttentionInside => construct::attention::inside, + +            StateName::AutolinkStart => construct::autolink::start, +            StateName::AutolinkOpen => construct::autolink::open, +            StateName::AutolinkSchemeOrEmailAtext => construct::autolink::scheme_or_email_atext, +            StateName::AutolinkSchemeInsideOrEmailAtext => { +                construct::autolink::scheme_inside_or_email_atext +            } +            StateName::AutolinkUrlInside => construct::autolink::url_inside, +            StateName::AutolinkEmailAtSignOrDot => construct::autolink::email_at_sign_or_dot, +            StateName::AutolinkEmailAtext => construct::autolink::email_atext, +            StateName::AutolinkEmailValue => construct::autolink::email_value, +            StateName::AutolinkEmailLabel => construct::autolink::email_label, + +            StateName::BlankLineStart => construct::blank_line::start, +            StateName::BlankLineAfter => construct::blank_line::after, + +            StateName::BlockQuoteStart => construct::block_quote::start, +            StateName::BlockQuoteBefore => construct::block_quote::before, +            StateName::BlockQuoteContStart => construct::block_quote::cont_start, +            StateName::BlockQuoteContBefore => construct::block_quote::cont_before, +            StateName::BlockQuoteContAfter => construct::block_quote::cont_after, + +            StateName::BomStart => construct::partial_bom::start, +            StateName::BomInside => construct::partial_bom::inside, + +            StateName::CharacterEscapeStart => construct::character_escape::start, +            StateName::CharacterEscapeInside => construct::character_escape::inside, + +            StateName::CharacterReferenceStart => construct::character_reference::start, +            StateName::CharacterReferenceOpen => construct::character_reference::open, +            StateName::CharacterReferenceNumeric => construct::character_reference::numeric, +            StateName::CharacterReferenceValue => construct::character_reference::value, + +            StateName::CodeFencedStart => construct::code_fenced::start, +            StateName::CodeFencedBeforeSequenceOpen => construct::code_fenced::before_sequence_open, +            StateName::CodeFencedSequenceOpen => construct::code_fenced::sequence_open, +            StateName::CodeFencedInfoBefore => construct::code_fenced::info_before, +            StateName::CodeFencedInfo => construct::code_fenced::info, +            StateName::CodeFencedMetaBefore => construct::code_fenced::meta_before, +            StateName::CodeFencedMeta => construct::code_fenced::meta, +            StateName::CodeFencedAtNonLazyBreak => construct::code_fenced::at_non_lazy_break, +            StateName::CodeFencedCloseBefore => construct::code_fenced::close_before, +            StateName::CodeFencedCloseStart => construct::code_fenced::close_start, +            StateName::CodeFencedBeforeSequenceClose => { +                construct::code_fenced::before_sequence_close +            } +            StateName::CodeFencedSequenceClose => construct::code_fenced::sequence_close, +            StateName::CodeFencedAfterSequenceClose => construct::code_fenced::sequence_close_after, +            StateName::CodeFencedContentBefore => construct::code_fenced::content_before, +            StateName::CodeFencedContentStart => construct::code_fenced::content_start, +            StateName::CodeFencedBeforeContentChunk => construct::code_fenced::before_content_chunk, +            StateName::CodeFencedContentChunk => construct::code_fenced::content_chunk, +            StateName::CodeFencedAfter => construct::code_fenced::after, + +            StateName::CodeIndentedStart => construct::code_indented::start, +            StateName::CodeIndentedAtBreak => construct::code_indented::at_break, +            StateName::CodeIndentedAfter => construct::code_indented::after, +            StateName::CodeIndentedFurtherStart => construct::code_indented::further_start, +            StateName::CodeIndentedInside => construct::code_indented::inside, +            StateName::CodeIndentedFurtherEnd => construct::code_indented::further_end, +            StateName::CodeIndentedFurtherBegin => construct::code_indented::further_begin, +            StateName::CodeIndentedFurtherAfter => construct::code_indented::further_after, + +            StateName::CodeTextStart => construct::code_text::start, +            StateName::CodeTextSequenceOpen => construct::code_text::sequence_open, +            StateName::CodeTextBetween => construct::code_text::between, +            StateName::CodeTextData => construct::code_text::data, +            StateName::CodeTextSequenceClose => construct::code_text::sequence_close, + +            StateName::DataStart => construct::partial_data::start, +            StateName::DataInside => construct::partial_data::inside, +            StateName::DataAtBreak => construct::partial_data::at_break, + +            StateName::DefinitionStart => construct::definition::start, +            StateName::DefinitionBefore => construct::definition::before, +            StateName::DefinitionLabelAfter => construct::definition::label_after, +            StateName::DefinitionMarkerAfter => construct::definition::marker_after, +            StateName::DefinitionDestinationBefore => construct::definition::destination_before, +            StateName::DefinitionDestinationAfter => construct::definition::destination_after, +            StateName::DefinitionDestinationMissing => construct::definition::destination_missing, +            StateName::DefinitionTitleBefore => construct::definition::title_before, +            StateName::DefinitionAfter => construct::definition::after, +            StateName::DefinitionAfterWhitespace => construct::definition::after_whitespace, +            StateName::DefinitionTitleBeforeMarker => construct::definition::title_before_marker, +            StateName::DefinitionTitleAfter => construct::definition::title_after, +            StateName::DefinitionTitleAfterOptionalWhitespace => { +                construct::definition::title_after_optional_whitespace +            } + +            StateName::DestinationStart => construct::partial_destination::start, +            StateName::DestinationEnclosedBefore => construct::partial_destination::enclosed_before, +            StateName::DestinationEnclosed => construct::partial_destination::enclosed, +            StateName::DestinationEnclosedEscape => construct::partial_destination::enclosed_escape, +            StateName::DestinationRaw => construct::partial_destination::raw, +            StateName::DestinationRawEscape => construct::partial_destination::raw_escape, + +            StateName::DocumentStart => content::document::start, +            StateName::DocumentLineStart => content::document::line_start, +            // StateName::DocumentContainerExistingBefore => content::document::container_existing_before, +            StateName::DocumentContainerExistingAfter => { +                content::document::container_existing_after +            } +            StateName::DocumentContainerExistingMissing => { +                content::document::container_existing_missing +            } +            // StateName::DocumentContainerNewBefore => content::document::container_new_before, +            StateName::DocumentContainerNewBeforeNotBlockQuote => { +                content::document::container_new_before_not_block_quote +            } +            StateName::DocumentContainerNewAfter => content::document::container_new_after, +            StateName::DocumentContainersAfter => content::document::containers_after, +            StateName::DocumentFlowEnd => content::document::flow_end, +            StateName::DocumentFlowInside => content::document::flow_inside, + +            StateName::FlowStart => content::flow::start, +            StateName::FlowBefore => content::flow::before, +            StateName::FlowAfter => content::flow::after, +            StateName::FlowBlankLineAfter => content::flow::blank_line_after, +            StateName::FlowBeforeParagraph => content::flow::before_paragraph, + +            StateName::HardBreakEscapeStart => construct::hard_break_escape::start, +            StateName::HardBreakEscapeAfter => construct::hard_break_escape::after, + +            StateName::HeadingAtxStart => construct::heading_atx::start, +            StateName::HeadingAtxBefore => construct::heading_atx::before, +            StateName::HeadingAtxSequenceOpen => construct::heading_atx::sequence_open, +            StateName::HeadingAtxAtBreak => construct::heading_atx::at_break, +            StateName::HeadingAtxSequenceFurther => construct::heading_atx::sequence_further, +            StateName::HeadingAtxData => construct::heading_atx::data, + +            StateName::HeadingSetextStart => construct::heading_setext::start, +            StateName::HeadingSetextBefore => construct::heading_setext::before, +            StateName::HeadingSetextInside => construct::heading_setext::inside, +            StateName::HeadingSetextAfter => construct::heading_setext::after, + +            StateName::HtmlFlowStart => construct::html_flow::start, +            StateName::HtmlFlowBefore => construct::html_flow::before, +            StateName::HtmlFlowOpen => construct::html_flow::open, +            StateName::HtmlFlowDeclarationOpen => construct::html_flow::declaration_open, +            StateName::HtmlFlowCommentOpenInside => construct::html_flow::comment_open_inside, +            StateName::HtmlFlowCdataOpenInside => construct::html_flow::cdata_open_inside, +            StateName::HtmlFlowTagCloseStart => construct::html_flow::tag_close_start, +            StateName::HtmlFlowTagName => construct::html_flow::tag_name, +            StateName::HtmlFlowBasicSelfClosing => construct::html_flow::basic_self_closing, +            StateName::HtmlFlowCompleteClosingTagAfter => { +                construct::html_flow::complete_closing_tag_after +            } +            StateName::HtmlFlowCompleteEnd => construct::html_flow::complete_end, +            StateName::HtmlFlowCompleteAttributeNameBefore => { +                construct::html_flow::complete_attribute_name_before +            } +            StateName::HtmlFlowCompleteAttributeName => { +                construct::html_flow::complete_attribute_name +            } +            StateName::HtmlFlowCompleteAttributeNameAfter => { +                construct::html_flow::complete_attribute_name_after +            } +            StateName::HtmlFlowCompleteAttributeValueBefore => { +                construct::html_flow::complete_attribute_value_before +            } +            StateName::HtmlFlowCompleteAttributeValueQuoted => { +                construct::html_flow::complete_attribute_value_quoted +            } +            StateName::HtmlFlowCompleteAttributeValueQuotedAfter => { +                construct::html_flow::complete_attribute_value_quoted_after +            } +            StateName::HtmlFlowCompleteAttributeValueUnquoted => { +                construct::html_flow::complete_attribute_value_unquoted +            } +            StateName::HtmlFlowCompleteAfter => construct::html_flow::complete_after, +            StateName::HtmlFlowBlankLineBefore => construct::html_flow::blank_line_before, +            StateName::HtmlFlowContinuation => construct::html_flow::continuation, +            StateName::HtmlFlowContinuationDeclarationInside => { +                construct::html_flow::continuation_declaration_inside +            } +            StateName::HtmlFlowContinuationAfter => construct::html_flow::continuation_after, +            StateName::HtmlFlowContinuationStart => construct::html_flow::continuation_start, +            StateName::HtmlFlowContinuationBefore => construct::html_flow::continuation_before, +            StateName::HtmlFlowContinuationCommentInside => { +                construct::html_flow::continuation_comment_inside +            } +            StateName::HtmlFlowContinuationRawTagOpen => { +                construct::html_flow::continuation_raw_tag_open +            } +            StateName::HtmlFlowContinuationRawEndTag => { +                construct::html_flow::continuation_raw_end_tag +            } +            StateName::HtmlFlowContinuationClose => construct::html_flow::continuation_close, +            StateName::HtmlFlowContinuationCdataInside => { +                construct::html_flow::continuation_cdata_inside +            } +            StateName::HtmlFlowContinuationStartNonLazy => { +                construct::html_flow::continuation_start_non_lazy +            } + +            StateName::HtmlTextStart => construct::html_text::start, +            StateName::HtmlTextOpen => construct::html_text::open, +            StateName::HtmlTextDeclarationOpen => construct::html_text::declaration_open, +            StateName::HtmlTextTagCloseStart => construct::html_text::tag_close_start, +            StateName::HtmlTextTagClose => construct::html_text::tag_close, +            StateName::HtmlTextTagCloseBetween => construct::html_text::tag_close_between, +            StateName::HtmlTextTagOpen => construct::html_text::tag_open, +            StateName::HtmlTextTagOpenBetween => construct::html_text::tag_open_between, +            StateName::HtmlTextTagOpenAttributeName => { +                construct::html_text::tag_open_attribute_name +            } +            StateName::HtmlTextTagOpenAttributeNameAfter => { +                construct::html_text::tag_open_attribute_name_after +            } +            StateName::HtmlTextTagOpenAttributeValueBefore => { +                construct::html_text::tag_open_attribute_value_before +            } +            StateName::HtmlTextTagOpenAttributeValueQuoted => { +                construct::html_text::tag_open_attribute_value_quoted +            } +            StateName::HtmlTextTagOpenAttributeValueQuotedAfter => { +                construct::html_text::tag_open_attribute_value_quoted_after +            } +            StateName::HtmlTextTagOpenAttributeValueUnquoted => { +                construct::html_text::tag_open_attribute_value_unquoted +            } +            StateName::HtmlTextCdata => construct::html_text::cdata, +            StateName::HtmlTextCdataOpenInside => construct::html_text::cdata_open_inside, +            StateName::HtmlTextCdataClose => construct::html_text::cdata_close, +            StateName::HtmlTextCdataEnd => construct::html_text::cdata_end, +            StateName::HtmlTextCommentOpenInside => construct::html_text::comment_open_inside, +            StateName::HtmlTextCommentStart => construct::html_text::comment_start, +            StateName::HtmlTextCommentStartDash => construct::html_text::comment_start_dash, +            StateName::HtmlTextComment => construct::html_text::comment, +            StateName::HtmlTextCommentClose => construct::html_text::comment_close, +            StateName::HtmlTextDeclaration => construct::html_text::declaration, +            StateName::HtmlTextEnd => construct::html_text::end, +            StateName::HtmlTextInstruction => construct::html_text::instruction, +            StateName::HtmlTextInstructionClose => construct::html_text::instruction_close, +            StateName::HtmlTextLineEndingAfter => construct::html_text::line_ending_after, +            StateName::HtmlTextLineEndingAfterPrefix => { +                construct::html_text::line_ending_after_prefix +            } + +            StateName::LabelStart => construct::partial_label::start, +            StateName::LabelAtBreak => construct::partial_label::at_break, +            StateName::LabelEolAfter => construct::partial_label::eol_after, +            StateName::LabelAtBlankLine => construct::partial_label::at_blank_line, +            StateName::LabelEscape => construct::partial_label::escape, +            StateName::LabelInside => construct::partial_label::inside, + +            StateName::LabelEndStart => construct::label_end::start, +            StateName::LabelEndAfter => construct::label_end::after, +            StateName::LabelEndResourceStart => construct::label_end::resource_start, +            StateName::LabelEndResourceBefore => construct::label_end::resource_before, +            StateName::LabelEndResourceOpen => construct::label_end::resource_open, +            StateName::LabelEndResourceDestinationAfter => { +                construct::label_end::resource_destination_after +            } +            StateName::LabelEndResourceDestinationMissing => { +                construct::label_end::resource_destination_missing +            } +            StateName::LabelEndResourceBetween => construct::label_end::resource_between, +            StateName::LabelEndResourceTitleAfter => construct::label_end::resource_title_after, +            StateName::LabelEndResourceEnd => construct::label_end::resource_end, +            StateName::LabelEndOk => construct::label_end::ok, +            StateName::LabelEndNok => construct::label_end::nok, +            StateName::LabelEndReferenceFull => construct::label_end::reference_full, +            StateName::LabelEndReferenceFullAfter => construct::label_end::reference_full_after, +            StateName::LabelEndReferenceNotFull => construct::label_end::reference_not_full, +            StateName::LabelEndReferenceCollapsed => construct::label_end::reference_collapsed, +            StateName::LabelEndReferenceCollapsedOpen => { +                construct::label_end::reference_collapsed_open +            } + +            StateName::LabelStartImageStart => construct::label_start_image::start, +            StateName::LabelStartImageOpen => construct::label_start_image::open, +            StateName::LabelStartLinkStart => construct::label_start_link::start, + +            StateName::ListStart => construct::list::start, +            StateName::ListBefore => construct::list::before, +            StateName::ListNok => construct::list::nok, +            StateName::ListBeforeUnordered => construct::list::before_unordered, +            StateName::ListValue => construct::list::value, +            StateName::ListMarkerAfter => construct::list::marker_after, +            StateName::ListAfter => construct::list::after, +            StateName::ListMarkerAfterFilled => construct::list::marker_after_filled, +            StateName::ListWhitespace => construct::list::whitespace, +            StateName::ListWhitespaceAfter => construct::list::whitespace_after, +            StateName::ListPrefixOther => construct::list::prefix_other, +            StateName::ListContStart => construct::list::cont_start, +            StateName::ListContBlank => construct::list::cont_blank, +            StateName::ListContFilled => construct::list::cont_filled, +            StateName::ListOk => construct::list::ok, + +            StateName::NonLazyContinuationStart => construct::partial_non_lazy_continuation::start, +            StateName::NonLazyContinuationAfter => construct::partial_non_lazy_continuation::after, + +            StateName::ParagraphStart => construct::paragraph::start, +            StateName::ParagraphInside => construct::paragraph::inside, + +            StateName::SpaceOrTabStart => construct::partial_space_or_tab::start, +            StateName::SpaceOrTabInside => construct::partial_space_or_tab::inside, + +            StateName::SpaceOrTabEolStart => construct::partial_space_or_tab::eol_start, +            StateName::SpaceOrTabEolAfterFirst => construct::partial_space_or_tab::eol_after_first, +            StateName::SpaceOrTabEolAfterEol => construct::partial_space_or_tab::eol_after_eol, +            StateName::SpaceOrTabEolAtEol => construct::partial_space_or_tab::eol_at_eol, +            StateName::SpaceOrTabEolAfterMore => construct::partial_space_or_tab::eol_after_more, + +            StateName::StringStart => content::string::start, +            StateName::StringBefore => content::string::before, +            StateName::StringBeforeData => content::string::before_data, + +            StateName::TextStart => content::text::start, +            StateName::TextBefore => content::text::before, +            StateName::TextBeforeData => content::text::before_data, + +            StateName::ThematicBreakStart => construct::thematic_break::start, +            StateName::ThematicBreakBefore => construct::thematic_break::before, +            StateName::ThematicBreakSequence => construct::thematic_break::sequence, +            StateName::ThematicBreakAtBreak => construct::thematic_break::at_break, + +            StateName::TitleStart => construct::partial_title::start, +            StateName::TitleBegin => construct::partial_title::begin, +            StateName::TitleAfterEol => construct::partial_title::after_eol, +            StateName::TitleAtBlankLine => construct::partial_title::at_blank_line, +            StateName::TitleEscape => construct::partial_title::escape, +            StateName::TitleInside => construct::partial_title::inside, +        }; + +        Box::new(func) +    } +} +  /// The result of a state. +#[derive(Debug, PartialEq)]  pub enum State { -    /// There is a future state: a boxed [`StateFn`][] to pass the next code to. -    Fn(Box<StateFn>), +    /// There is a future state: a [`StateName`][] to pass the next code to. +    Fn(StateName),      /// The state is successful.      Ok,      /// The state is not successful. @@ -163,7 +775,7 @@ struct InternalState {  /// To do  #[allow(clippy::struct_excessive_bools)] -pub struct TokenizeState { +pub struct TokenizeState<'a> {      /// To do.      pub connect: bool,      /// To do. @@ -171,15 +783,15 @@ pub struct TokenizeState {      /// To do.      pub document_continued: usize,      /// To do. -    pub document_index: usize, -    /// To do. -    pub document_inject: Vec<(Vec<Event>, Vec<Event>)>, -    /// To do.      pub document_interrupt_before: bool,      /// To do.      pub document_paragraph_before: bool,      /// To do. -    pub document_next: Option<Box<StateFn>>, +    pub document_data_index: Option<usize>, +    /// To do. +    pub document_child_state: Option<State>, +    /// To do. +    pub child_tokenizer: Option<Box<Tokenizer<'a>>>,      /// To do.      pub marker: u8,      /// To do. @@ -187,7 +799,7 @@ pub struct TokenizeState {      /// To do.      pub prefix: usize,      /// To do. -    pub return_state: Option<Box<StateFn>>, +    pub return_state: Option<StateName>,      /// To do.      pub seen: bool,      /// To do. @@ -234,7 +846,7 @@ pub struct Tokenizer<'a> {      /// Track whether this tokenizer is done.      resolved: bool,      /// To do. -    attempt_balance: usize, +    attempts: Vec<Attempt>,      /// Current byte.      pub current: Option<u8>,      /// Previous byte. @@ -251,13 +863,13 @@ pub struct Tokenizer<'a> {      pub map: EditMap,      /// List of attached resolvers, which will be called when done feeding,      /// to clean events. -    resolvers: Vec<Box<Resolver>>, +    pub resolvers: Vec<Box<Resolver>>,      /// List of names associated with attached resolvers. -    resolver_ids: Vec<String>, +    pub resolver_ids: Vec<String>,      /// Shared parsing state across tokenizers.      pub parse_state: &'a ParseState<'a>,      /// To do. -    pub tokenize_state: TokenizeState, +    pub tokenize_state: TokenizeState<'a>,      /// Stack of label (start) that could form images and links.      ///      /// Used when tokenizing [text content][crate::content::text]. @@ -299,7 +911,7 @@ impl<'a> Tokenizer<'a> {              line_start: point.clone(),              consumed: true,              resolved: false, -            attempt_balance: 0, +            attempts: vec![],              point,              stack: vec![],              events: vec![], @@ -308,11 +920,11 @@ impl<'a> Tokenizer<'a> {                  connect: false,                  document_container_stack: vec![],                  document_continued: 0, -                document_index: 0, -                document_inject: vec![],                  document_interrupt_before: false,                  document_paragraph_before: false, -                document_next: None, +                document_data_index: None, +                document_child_state: None, +                child_tokenizer: None,                  marker: 0,                  marker_other: 0,                  prefix: 0, @@ -369,13 +981,22 @@ impl<'a> Tokenizer<'a> {      }      /// Define a jump between two places. -    pub fn define_skip(&mut self, point: &Point) { -        define_skip_impl(self, point.line, (point.index, point.vs)); -    } +    /// +    /// This defines to which future index we move after a line ending. +    pub fn define_skip(&mut self, mut point: Point) { +        move_point_back(self, &mut point); + +        let info = (point.index, point.vs); +        log::debug!("position: define skip: {:?} -> ({:?})", point.line, info); +        let at = point.line - self.first_line; + +        if at >= self.column_start.len() { +            self.column_start.push(info); +        } else { +            self.column_start[at] = info; +        } -    /// Define the current place as a jump between two places. -    pub fn define_skip_current(&mut self) { -        define_skip_impl(self, self.point.line, (self.point.index, self.point.vs)); +        self.account_for_potential_skip();      }      /// Increment the current positional info if we’re right after a line @@ -396,8 +1017,8 @@ impl<'a> Tokenizer<'a> {      }      /// Consume the current byte. -    /// Each [`StateFn`][] is expected to call this to signal that this code is -    /// used, or call a next `StateFn`. +    /// Each state function is expected to call this to signal that this code is +    /// used, or call a next function.      pub fn consume(&mut self) {          log::debug!("consume: `{:?}` ({:?})", self.current, self.point);          debug_assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned"); @@ -473,16 +1094,7 @@ impl<'a> Tokenizer<'a> {      pub fn enter_with_link(&mut self, token_type: Token, link: Option<Link>) {          let mut point = self.point.clone(); - -        // Move back past ignored bytes. -        while point.index > 0 { -            point.index -= 1; -            let action = byte_action(self.parse_state.bytes, &point); -            if !matches!(action, ByteAction::Ignore) { -                point.index += 1; -                break; -            } -        } +        move_point_back(self, &mut point);          log::debug!("enter: `{:?}` ({:?})", token_type, point);          self.events.push(Event { @@ -527,15 +1139,7 @@ impl<'a> Tokenizer<'a> {          if matches!(self.previous, Some(b'\n')) {              point = self.line_start.clone();          } else { -            // Move back past ignored bytes. -            while point.index > 0 { -                point.index -= 1; -                let action = byte_action(self.parse_state.bytes, &point); -                if !matches!(action, ByteAction::Ignore) { -                    point.index += 1; -                    break; -                } -            } +            move_point_back(self, &mut point);          }          log::debug!("exit: `{:?}` ({:?})", token_type, point); @@ -575,29 +1179,20 @@ impl<'a> Tokenizer<'a> {          self.stack.truncate(previous.stack_len);      } -    /// Parse with `state_fn` and its future states, switching to `ok` when +    /// Parse with `state_name` and its future states, switching to `ok` when      /// successful, and passing [`State::Nok`][] back up if it occurs.      ///      /// This function does not capture the current state, in case of      /// `State::Nok`, as it is assumed that this `go` is itself wrapped in      /// another `attempt`.      #[allow(clippy::unused_self)] -    pub fn go( -        &mut self, -        state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static, -        after: impl FnOnce(&mut Tokenizer) -> State + 'static, -    ) -> Box<StateFn> { -        self.attempt_balance += 1; +    pub fn go(&mut self, state_name: StateName, after: StateName) -> State {          attempt_impl( -            state_fn, -            None, -            self.point.index, -            |tokenizer: &mut Tokenizer, state| { -                tokenizer.attempt_balance -= 1; - +            self, +            state_name, +            Box::new(move |_tokenizer: &mut Tokenizer, state| {                  if matches!(state, State::Ok) { -                    tokenizer.consumed = true; -                    State::Fn(Box::new(after)) +                    State::Fn(after)                  } else {                      // Must be `Nok`.                      // We don’t capture/free state because it is assumed that @@ -605,132 +1200,122 @@ impl<'a> Tokenizer<'a> {                      // if it can occur.                      state                  } -            }, -        ) -    } - -    /// Like `go`, but this lets you *hijack* back to some other state after a -    /// certain code. -    #[allow(clippy::unused_self)] -    pub fn go_until( -        &mut self, -        state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static, -        until: impl Fn(Option<u8>) -> bool + 'static, -        done: impl FnOnce(State) -> Box<StateFn> + 'static, -    ) -> Box<StateFn> { -        self.attempt_balance += 1; -        attempt_impl( -            state_fn, -            Some(Box::new(until)), -            self.point.index, -            |tokenizer: &mut Tokenizer, state| { -                tokenizer.attempt_balance -= 1; -                tokenizer.consumed = true; -                // We don’t capture/free state because it is assumed that -                // `go_until` itself is wrapped in another attempt that does -                // that if it can occur. -                State::Fn(done(state)) -            }, +            }),          )      } -    /// Parse with `state_fn` and its future states, to check if it result in +    /// Parse with `state_name` and its future states, to check if it result in      /// [`State::Ok`][] or [`State::Nok`][], revert on both cases, and then      /// call `done` with whether it was successful or not.      ///      /// This captures the current state of the tokenizer, returns a wrapped -    /// state that captures all codes and feeds them to `state_fn` and its +    /// state that captures all codes and feeds them to `state_name` and its      /// future states until it yields `State::Ok` or `State::Nok`.      /// It then applies the captured state, calls `done`, and feeds all      /// captured codes to its future states.      pub fn check(          &mut self, -        state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static, -        done: impl FnOnce(bool) -> Box<StateFn> + 'static, -    ) -> Box<StateFn> { -        self.attempt_balance += 1; +        state_name: StateName, +        done: impl FnOnce(bool) -> State + 'static, +    ) -> State {          let previous = self.capture();          attempt_impl( -            state_fn, -            None, -            self.point.index, -            |tokenizer: &mut Tokenizer, state| { -                tokenizer.attempt_balance -= 1; +            self, +            state_name, +            Box::new(|tokenizer: &mut Tokenizer, state| {                  tokenizer.free(previous);                  tokenizer.consumed = true; -                State::Fn(done(matches!(state, State::Ok))) -            }, +                done(matches!(state, State::Ok)) +            }),          )      } -    /// Parse with `state_fn` and its future states, to check if it results in +    /// Parse with `state_name` and its future states, to check if it results in      /// [`State::Ok`][] or [`State::Nok`][], revert on the case of      /// `State::Nok`, and then call `done` with whether it was successful or      /// not.      ///      /// This captures the current state of the tokenizer, returns a wrapped -    /// state that captures all codes and feeds them to `state_fn` and its +    /// state that captures all codes and feeds them to `state_name` and its      /// future states until it yields `State::Ok`, at which point it calls      /// `done` and yields its result.      /// If instead `State::Nok` was yielded, the captured state is applied,      /// `done` is called, and all captured codes are fed to its future states.      pub fn attempt(          &mut self, -        state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static, -        done: impl FnOnce(bool) -> Box<StateFn> + 'static, -    ) -> Box<StateFn> { -        self.attempt_balance += 1; +        state_name: StateName, +        done: impl FnOnce(bool) -> State + 'static, +    ) -> State {          let previous = self.capture(); +        log::debug!("attempting: {:?}", state_name); +        // self.consumed = false;          attempt_impl( -            state_fn, -            None, -            self.point.index, -            |tokenizer: &mut Tokenizer, state| { -                tokenizer.attempt_balance -= 1; +            self, +            state_name, +            Box::new(move |tokenizer: &mut Tokenizer, state| {                  let ok = matches!(state, State::Ok);                  if !ok {                      tokenizer.free(previous); +                    tokenizer.consumed = true;                  } -                log::debug!("attempt: {:?}, at {:?}", ok, tokenizer.point); +                log::debug!( +                    "attempted {:?}: {:?}, at {:?}", +                    state_name, +                    ok, +                    tokenizer.point +                ); -                tokenizer.consumed = true; -                State::Fn(done(ok)) -            }, +                done(ok) +            }),          )      }      /// Just like [`attempt`][Tokenizer::attempt], but many.      pub fn attempt_n(          &mut self, -        mut state_fns: Vec<Box<StateFn>>, -        done: impl FnOnce(bool) -> Box<StateFn> + 'static, -    ) -> Box<StateFn> { -        if state_fns.is_empty() { +        mut state_names: Vec<StateName>, +        done: impl FnOnce(bool) -> State + 'static, +    ) -> State { +        if state_names.is_empty() {              done(false)          } else { -            let state_fn = state_fns.remove(0); -            self.attempt(state_fn, move |ok| { -                if ok { -                    done(ok) -                } else { -                    Box::new(|t| t.attempt_n(state_fns, done)(t)) -                } -            }) +            let previous = self.capture(); +            let state_name = state_names.remove(0); +            self.consumed = false; +            log::debug!("attempting (n): {:?}", state_name); +            attempt_impl( +                self, +                state_name, +                Box::new(move |tokenizer: &mut Tokenizer, state| { +                    let ok = matches!(state, State::Ok); + +                    log::debug!( +                        "attempted (n) {:?}: {:?}, at {:?}", +                        state_name, +                        ok, +                        tokenizer.point +                    ); + +                    if ok { +                        done(true) +                    } else { +                        tokenizer.free(previous); +                        tokenizer.consumed = true; +                        tokenizer.attempt_n(state_names, done) +                    } +                }), +            )          }      }      /// Just like [`attempt`][Tokenizer::attempt], but for when you don’t care      /// about `ok`. -    pub fn attempt_opt( -        &mut self, -        state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static, -        after: impl FnOnce(&mut Tokenizer) -> State + 'static, -    ) -> Box<StateFn> { -        self.attempt(state_fn, |_ok| Box::new(after)) +    pub fn attempt_opt(&mut self, state_name: StateName, after: StateName) -> State { +        self.attempt(state_name, move |_ok| State::Fn(after))      }      /// Feed a list of `codes` into `start`. @@ -738,30 +1323,40 @@ impl<'a> Tokenizer<'a> {      /// This is set up to support repeatedly calling `feed`, and thus streaming      /// markdown into the state machine, and normally pauses after feeding.      // Note: if needed: accept `vs`? -    pub fn push( -        &mut self, -        min: usize, -        max: usize, -        start: impl FnOnce(&mut Tokenizer) -> State + 'static, -    ) -> State { +    pub fn push(&mut self, min: usize, max: usize, state_name: StateName) -> State {          debug_assert!(!self.resolved, "cannot feed after drain"); -        debug_assert!(min >= self.point.index, "cannot move backwards"); -        self.move_to((min, 0)); +        // debug_assert!(min >= self.point.index, "cannot move backwards"); +        if min > self.point.index { +            self.move_to((min, 0)); +        } -        let mut state = State::Fn(Box::new(start)); +        let mut state = State::Fn(state_name);          while self.point.index < max {              match state { -                State::Ok | State::Nok => break, -                State::Fn(func) => match byte_action(self.parse_state.bytes, &self.point) { +                State::Ok | State::Nok => { +                    if let Some(attempt) = self.attempts.pop() { +                        let done = attempt.done; +                        self.consumed = true; +                        state = done(self, state); +                    } else { +                        break; +                    } +                } +                State::Fn(state_name) => match byte_action(self.parse_state.bytes, &self.point) {                      ByteAction::Ignore => { -                        state = State::Fn(Box::new(func)); +                        state = State::Fn(state_name);                          self.move_one();                      }                      ByteAction::Insert(byte) | ByteAction::Normal(byte) => { -                        log::debug!("main: passing: `{:?}` ({:?})", byte, self.point); +                        log::debug!( +                            "main: passing: `{:?}` ({:?}) to {:?}", +                            byte, +                            self.point, +                            state_name +                        );                          self.expect(Some(byte)); -                        state = func(self); +                        state = call_impl(self, state_name);                      }                  },              } @@ -778,8 +1373,16 @@ impl<'a> Tokenizer<'a> {          loop {              match state { -                State::Ok | State::Nok => break, -                State::Fn(func) => { +                State::Ok | State::Nok => { +                    if let Some(attempt) = self.attempts.pop() { +                        let done = attempt.done; +                        self.consumed = true; +                        state = done(self, state); +                    } else { +                        break; +                    } +                } +                State::Fn(state_name) => {                      // We sometimes move back when flushing, so then we use those codes.                      let action = if self.point.index == max {                          None @@ -788,7 +1391,7 @@ impl<'a> Tokenizer<'a> {                      };                      if let Some(ByteAction::Ignore) = action { -                        state = State::Fn(Box::new(func)); +                        state = State::Fn(state_name);                          self.move_one();                      } else {                          let byte = @@ -800,14 +1403,20 @@ impl<'a> Tokenizer<'a> {                                  None                              }; -                        log::debug!("main: flushing: `{:?}` ({:?})", byte, self.point); +                        log::debug!( +                            "main: flushing: `{:?}` ({:?}) to {:?}", +                            byte, +                            self.point, +                            state_name +                        );                          self.expect(byte); -                        state = func(self); +                        state = call_impl(self, state_name);                      }                  }              }          } +        self.consumed = true;          debug_assert!(matches!(state, State::Ok), "must be ok");          if resolve { @@ -869,80 +1478,29 @@ fn byte_action(bytes: &[u8], point: &Point) -> ByteAction {  /// Recurses into itself.  /// Used in [`Tokenizer::attempt`][Tokenizer::attempt] and  [`Tokenizer::check`][Tokenizer::check].  fn attempt_impl( -    state: impl FnOnce(&mut Tokenizer) -> State + 'static, -    pause: Option<Box<dyn Fn(Option<u8>) -> bool + 'static>>, -    start: usize, -    done: impl FnOnce(&mut Tokenizer, State) -> State + 'static, -) -> Box<StateFn> { -    Box::new(move |tokenizer| { -        if let Some(ref func) = pause { -            if tokenizer.point.index > start && func(tokenizer.previous) { -                return done(tokenizer, State::Fn(Box::new(state))); -            } -        } +    tokenizer: &mut Tokenizer, +    state_name: StateName, +    done: Box<impl FnOnce(&mut Tokenizer, State) -> State + 'static>, +) -> State { +    tokenizer.attempts.push(Attempt { done }); +    call_impl(tokenizer, state_name) +} -        let state = state(tokenizer); - -        match state { -            State::Ok | State::Nok => { -                if tokenizer.attempt_balance == 0 { -                    debug_assert!(!tokenizer.tokenize_state.connect); -                    debug_assert_eq!(tokenizer.tokenize_state.document_continued, 0); -                    debug_assert_eq!(tokenizer.tokenize_state.document_index, 0); -                    debug_assert!(!tokenizer.tokenize_state.document_interrupt_before); -                    debug_assert!(!tokenizer.tokenize_state.document_paragraph_before); -                    debug_assert_eq!(tokenizer.tokenize_state.marker, 0); -                    debug_assert_eq!(tokenizer.tokenize_state.marker_other, 0); -                    debug_assert_eq!(tokenizer.tokenize_state.prefix, 0); -                    debug_assert!(!tokenizer.tokenize_state.seen); -                    debug_assert_eq!(tokenizer.tokenize_state.size, 0); -                    debug_assert_eq!(tokenizer.tokenize_state.size_other, 0); -                    debug_assert_eq!(tokenizer.tokenize_state.stop.len(), 0); -                    debug_assert_eq!(tokenizer.tokenize_state.start, 0); -                    debug_assert_eq!(tokenizer.tokenize_state.end, 0); -                    debug_assert!(tokenizer.tokenize_state.return_state.is_none()); -                    debug_assert!(!tokenizer.tokenize_state.space_or_tab_eol_connect); -                    debug_assert!(!tokenizer.tokenize_state.space_or_tab_eol_ok); -                    debug_assert!(tokenizer -                        .tokenize_state -                        .space_or_tab_eol_content_type -                        .is_none()); -                    debug_assert!(!tokenizer.tokenize_state.space_or_tab_connect); -                    debug_assert!(tokenizer.tokenize_state.space_or_tab_content_type.is_none()); -                    debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_min, 0); -                    debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_max, 0); -                    debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_size, 0); -                    debug_assert_eq!( -                        tokenizer.tokenize_state.space_or_tab_token, -                        Token::SpaceOrTab -                    ); -                    debug_assert_eq!(tokenizer.tokenize_state.token_1, Token::Data); -                    debug_assert_eq!(tokenizer.tokenize_state.token_2, Token::Data); -                    debug_assert_eq!(tokenizer.tokenize_state.token_3, Token::Data); -                    debug_assert_eq!(tokenizer.tokenize_state.token_4, Token::Data); -                    debug_assert_eq!(tokenizer.tokenize_state.token_5, Token::Data); -                } +#[allow(clippy::too_many_lines)] +fn call_impl(tokenizer: &mut Tokenizer, state_name: StateName) -> State { +    let func = state_name.to_func(); -                done(tokenizer, state) -            } -            State::Fn(func) => State::Fn(attempt_impl(func, pause, start, done)), -        } -    }) +    func(tokenizer)  } -/// Flush `start`: pass `eof`s to it until done. -/// Define a jump between two places. -/// -/// This defines to which future index we move after a line ending. -fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize)) { -    log::debug!("position: define skip: {:?} -> ({:?})", line, info); -    let at = line - tokenizer.first_line; - -    if at >= tokenizer.column_start.len() { -        tokenizer.column_start.push(info); -    } else { -        tokenizer.column_start[at] = info; +fn move_point_back(tokenizer: &mut Tokenizer, point: &mut Point) { +    // Move back past ignored bytes. +    while point.index > 0 { +        point.index -= 1; +        let action = byte_action(tokenizer.parse_state.bytes, point); +        if !matches!(action, ByteAction::Ignore) { +            point.index += 1; +            break; +        }      } - -    tokenizer.account_for_potential_skip();  } | 
