From 8f8d72a749a39845fd03ae259533abe73dc7dcdf Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 9 Aug 2022 14:04:27 +0200 Subject: Add support for passing `ok`, `nok` as separate states to attempts --- src/construct/blank_line.rs | 6 +- src/construct/block_quote.rs | 12 +- src/construct/code_fenced.rs | 60 ++++++---- src/construct/code_indented.rs | 36 +++--- src/construct/definition.rs | 59 +++++++--- src/construct/heading_atx.rs | 18 ++- src/construct/heading_setext.rs | 12 +- src/construct/html_flow.rs | 26 ++--- src/construct/html_text.rs | 6 +- src/construct/label_end.rs | 84 ++++++++------ src/construct/list.rs | 60 +++++----- src/construct/partial_label.rs | 12 +- src/construct/partial_space_or_tab.rs | 18 +-- src/construct/partial_title.rs | 12 +- src/construct/thematic_break.rs | 12 +- src/content/document.rs | 42 ++++--- src/content/flow.rs | 97 ++++++++++++----- src/content/string.rs | 28 ++--- src/content/text.rs | 85 +++++++++++---- src/tokenizer.rs | 200 +++++++++++++--------------------- 20 files changed, 509 insertions(+), 376 deletions(-) diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index b12c2c4..22dfdc0 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -47,7 +47,11 @@ use crate::tokenizer::{State, StateName, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::BlankLineAfter) + tokenizer.attempt( + state_name, + State::Fn(StateName::BlankLineAfter), + State::Fn(StateName::BlankLineAfter), + ) } /// After zero or more spaces or tabs, before a line ending or EOF. diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index df58d62..cec3dce 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -55,7 +55,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.go(state_name, StateName::BlockQuoteBefore) + tokenizer.attempt( + state_name, + State::Fn(StateName::BlockQuoteBefore), + State::Nok, + ) } else { State::Nok } @@ -94,7 +98,11 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.go(state_name, StateName::BlockQuoteContBefore) + tokenizer.attempt( + state_name, + State::Fn(StateName::BlockQuoteContBefore), + State::Nok, + ) } /// After whitespace, before `>`. diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 46c5f9f..91fd8e4 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -128,7 +128,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceOpen) + tokenizer.attempt( + state_name, + State::Fn(StateName::CodeFencedBeforeSequenceOpen), + State::Nok, + ) } else { State::Nok } @@ -184,7 +188,11 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { _ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => { tokenizer.exit(Token::CodeFencedFenceSequence); let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::CodeFencedInfoBefore) + tokenizer.attempt( + state_name, + State::Fn(StateName::CodeFencedInfoBefore), + State::Fn(StateName::CodeFencedInfoBefore), + ) } _ => { tokenizer.tokenize_state.marker = 0; @@ -241,7 +249,11 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceInfo); let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::CodeFencedMetaBefore) + tokenizer.attempt( + state_name, + State::Fn(StateName::CodeFencedMetaBefore), + State::Fn(StateName::CodeFencedMetaBefore), + ) } Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { tokenizer.concrete = false; @@ -323,13 +335,11 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { /// | ~~~ /// ``` pub fn at_break(tokenizer: &mut Tokenizer) -> State { - tokenizer.check(StateName::NonLazyContinuationStart, |ok| { - State::Fn(if ok { - StateName::CodeFencedAtNonLazyBreak - } else { - StateName::CodeFencedAfter - }) - }) + tokenizer.check( + StateName::NonLazyContinuationStart, + State::Fn(StateName::CodeFencedAtNonLazyBreak), + State::Fn(StateName::CodeFencedAfter), + ) } /// At an eol/eof in code, before a non-lazy closing fence or content. @@ -342,13 +352,11 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { /// | ~~~ /// ``` pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(StateName::CodeFencedCloseBefore, |ok| { - State::Fn(if ok { - StateName::CodeFencedAfter - } else { - StateName::CodeFencedContentBefore - }) - }) + tokenizer.attempt( + StateName::CodeFencedCloseBefore, + State::Fn(StateName::CodeFencedAfter), + State::Fn(StateName::CodeFencedContentBefore), + ) } /// Before a closing fence, at the line ending. @@ -390,7 +398,11 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceClose) + tokenizer.attempt( + state_name, + State::Fn(StateName::CodeFencedBeforeSequenceClose), + State::Nok, + ) } /// In a closing fence, after optional whitespace, before sequence. @@ -432,7 +444,11 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size_other = 0; tokenizer.exit(Token::CodeFencedFenceSequence); let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::CodeFencedAfterSequenceClose) + tokenizer.attempt( + state_name, + State::Fn(StateName::CodeFencedAfterSequenceClose), + State::Fn(StateName::CodeFencedAfterSequenceClose), + ) } _ => { tokenizer.tokenize_state.size_other = 0; @@ -483,7 +499,11 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn content_start(tokenizer: &mut Tokenizer) -> State { let state_name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.prefix); - tokenizer.go(state_name, StateName::CodeFencedBeforeContentChunk) + tokenizer.attempt( + state_name, + State::Fn(StateName::CodeFencedBeforeContentChunk), + State::Nok, + ) } /// Before code content, after a prefix. diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 516b493..0906e5f 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -65,7 +65,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented { tokenizer.enter(Token::CodeIndented); let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); - tokenizer.go(state_name, StateName::CodeIndentedAtBreak) + tokenizer.attempt( + state_name, + State::Fn(StateName::CodeIndentedAtBreak), + State::Nok, + ) } else { State::Nok } @@ -80,13 +84,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => after(tokenizer), - Some(b'\n') => tokenizer.attempt(StateName::CodeIndentedFurtherStart, |ok| { - State::Fn(if ok { - StateName::CodeIndentedAtBreak - } else { - StateName::CodeIndentedAfter - }) - }), + Some(b'\n') => tokenizer.attempt( + StateName::CodeIndentedFurtherStart, + State::Fn(StateName::CodeIndentedAtBreak), + State::Fn(StateName::CodeIndentedAfter), + ), _ => { tokenizer.enter(Token::CodeFlowChunk); inside(tokenizer) @@ -143,13 +145,11 @@ pub fn further_start(tokenizer: &mut Tokenizer) -> State { } _ if !tokenizer.lazy => { let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); - tokenizer.attempt(state_name, |ok| { - State::Fn(if ok { - StateName::CodeIndentedFurtherEnd - } else { - StateName::CodeIndentedFurtherBegin - }) - }) + tokenizer.attempt( + state_name, + State::Fn(StateName::CodeIndentedFurtherEnd), + State::Fn(StateName::CodeIndentedFurtherBegin), + ) } _ => State::Nok, } @@ -175,7 +175,11 @@ pub fn further_end(_tokenizer: &mut Tokenizer) -> State { /// ``` pub fn further_begin(tokenizer: &mut Tokenizer) -> State { let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::CodeIndentedFurtherAfter) + tokenizer.attempt( + state_name, + State::Fn(StateName::CodeIndentedFurtherAfter), + State::Fn(StateName::CodeIndentedFurtherAfter), + ) } /// After whitespace, not indented enough. diff --git a/src/construct/definition.rs b/src/construct/definition.rs index fbad99d..350992b 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -120,7 +120,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::Definition); // Note: arbitrary whitespace allowed even if code (indented) is on. let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::DefinitionBefore) + tokenizer.attempt( + state_name, + State::Fn(StateName::DefinitionBefore), + State::Fn(StateName::DefinitionBefore), + ) } else { State::Nok } @@ -138,7 +142,11 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::DefinitionLabel; tokenizer.tokenize_state.token_2 = Token::DefinitionLabelMarker; tokenizer.tokenize_state.token_3 = Token::DefinitionLabelString; - tokenizer.go(StateName::LabelStart, StateName::DefinitionLabelAfter) + tokenizer.attempt( + StateName::LabelStart, + State::Fn(StateName::DefinitionLabelAfter), + State::Nok, + ) } _ => State::Nok, } @@ -169,7 +177,11 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State { /// To do. pub fn marker_after(tokenizer: &mut Tokenizer) -> State { let state_name = space_or_tab_eol(tokenizer); - tokenizer.attempt_opt(state_name, StateName::DefinitionDestinationBefore) + tokenizer.attempt( + state_name, + State::Fn(StateName::DefinitionDestinationBefore), + State::Fn(StateName::DefinitionDestinationBefore), + ) } /// Before a destination. @@ -185,13 +197,11 @@ pub fn destination_before(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_4 = Token::DefinitionDestinationRaw; tokenizer.tokenize_state.token_5 = Token::DefinitionDestinationString; tokenizer.tokenize_state.size_other = usize::MAX; - tokenizer.attempt(StateName::DestinationStart, |ok| { - State::Fn(if ok { - StateName::DefinitionDestinationAfter - } else { - StateName::DefinitionDestinationMissing - }) - }) + tokenizer.attempt( + StateName::DestinationStart, + State::Fn(StateName::DefinitionDestinationAfter), + State::Fn(StateName::DefinitionDestinationMissing), + ) } /// After a destination. @@ -207,7 +217,11 @@ pub fn destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_4 = Token::Data; tokenizer.tokenize_state.token_5 = Token::Data; tokenizer.tokenize_state.size_other = 0; - tokenizer.attempt_opt(StateName::DefinitionTitleBefore, StateName::DefinitionAfter) + tokenizer.attempt( + StateName::DefinitionTitleBefore, + State::Fn(StateName::DefinitionAfter), + State::Fn(StateName::DefinitionAfter), + ) } /// Without destination. @@ -231,7 +245,11 @@ pub fn destination_missing(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::DefinitionAfterWhitespace) + tokenizer.attempt( + state_name, + State::Fn(StateName::DefinitionAfterWhitespace), + State::Fn(StateName::DefinitionAfterWhitespace), + ) } /// After a definition, after optional whitespace. @@ -264,7 +282,11 @@ pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn title_before(tokenizer: &mut Tokenizer) -> State { let state_name = space_or_tab_eol(tokenizer); - tokenizer.go(state_name, StateName::DefinitionTitleBeforeMarker) + tokenizer.attempt( + state_name, + State::Fn(StateName::DefinitionTitleBeforeMarker), + State::Nok, + ) } /// Before a title, after a line ending. @@ -278,7 +300,11 @@ pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::DefinitionTitle; tokenizer.tokenize_state.token_2 = Token::DefinitionTitleMarker; tokenizer.tokenize_state.token_3 = Token::DefinitionTitleString; - tokenizer.go(StateName::TitleStart, StateName::DefinitionTitleAfter) + tokenizer.attempt( + StateName::TitleStart, + State::Fn(StateName::DefinitionTitleAfter), + State::Nok, + ) } /// After a title. @@ -292,9 +318,10 @@ pub fn title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt( + tokenizer.attempt( state_name, - StateName::DefinitionTitleAfterOptionalWhitespace, + State::Fn(StateName::DefinitionTitleAfterOptionalWhitespace), + State::Fn(StateName::DefinitionTitleAfterOptionalWhitespace), ) } diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 45c4758..e5bc3bd 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -77,7 +77,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.go(state_name, StateName::HeadingAtxBefore) + tokenizer.attempt( + state_name, + State::Fn(StateName::HeadingAtxBefore), + State::Nok, + ) } else { State::Nok } @@ -120,7 +124,11 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 0; tokenizer.exit(Token::HeadingAtxSequence); let state_name = space_or_tab(tokenizer); - tokenizer.go(state_name, StateName::HeadingAtxAtBreak) + tokenizer.attempt( + state_name, + State::Fn(StateName::HeadingAtxAtBreak), + State::Nok, + ) } _ => { tokenizer.tokenize_state.size = 0; @@ -146,7 +154,11 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } Some(b'\t' | b' ') => { let state_name = space_or_tab(tokenizer); - tokenizer.go(state_name, StateName::HeadingAtxAtBreak) + tokenizer.attempt( + state_name, + State::Fn(StateName::HeadingAtxAtBreak), + State::Nok, + ) } Some(b'#') => { tokenizer.enter(Token::HeadingAtxSequence); diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 50feba4..a8c707a 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -93,7 +93,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { }, ); - tokenizer.go(state_name, StateName::HeadingSetextBefore) + tokenizer.attempt( + state_name, + State::Fn(StateName::HeadingSetextBefore), + State::Nok, + ) } else { State::Nok } @@ -134,7 +138,11 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = 0; tokenizer.exit(Token::HeadingSetextUnderline); let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::HeadingSetextAfter) + tokenizer.attempt( + state_name, + State::Fn(StateName::HeadingSetextAfter), + State::Fn(StateName::HeadingSetextAfter), + ) } } } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 779146c..128fd2e 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -147,7 +147,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { }, ); - tokenizer.go(state_name, StateName::HtmlFlowBefore) + tokenizer.attempt(state_name, State::Fn(StateName::HtmlFlowBefore), State::Nok) } else { State::Nok } @@ -632,13 +632,11 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { || tokenizer.tokenize_state.marker == COMPLETE => { tokenizer.exit(Token::HtmlFlowData); - tokenizer.check(StateName::HtmlFlowBlankLineBefore, |ok| { - State::Fn(if ok { - StateName::HtmlFlowContinuationAfter - } else { - StateName::HtmlFlowContinuationStart - }) - }) + tokenizer.check( + StateName::HtmlFlowBlankLineBefore, + State::Fn(StateName::HtmlFlowContinuationAfter), + State::Fn(StateName::HtmlFlowContinuationStart), + ) } // Note: important that this is after the basic/complete case. None | Some(b'\n') => { @@ -680,13 +678,11 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { /// | asd /// ``` pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.check(StateName::NonLazyContinuationStart, |ok| { - State::Fn(if ok { - StateName::HtmlFlowContinuationStartNonLazy - } else { - StateName::HtmlFlowContinuationAfter - }) - }) + tokenizer.check( + StateName::NonLazyContinuationStart, + State::Fn(StateName::HtmlFlowContinuationStartNonLazy), + State::Fn(StateName::HtmlFlowContinuationAfter), + ) } /// In continuation, at an eol, before non-lazy content. diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 1c1f9e6..c1dfaca 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -659,7 +659,11 @@ pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::HtmlTextLineEndingAfterPrefix) + tokenizer.attempt( + state_name, + State::Fn(StateName::HtmlTextLineEndingAfterPrefix), + State::Fn(StateName::HtmlTextLineEndingAfterPrefix), + ) } /// After a line ending, after indent. diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index ae9fe77..47ded36 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -237,25 +237,28 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Resource (`[asd](fgh)`)? - Some(b'(') => tokenizer.attempt(StateName::LabelEndResourceStart, move |is_ok| { - State::Fn(if is_ok || defined { + Some(b'(') => tokenizer.attempt( + StateName::LabelEndResourceStart, + State::Fn(StateName::LabelEndOk), + State::Fn(if defined { StateName::LabelEndOk } else { StateName::LabelEndNok - }) - }), + }), + ), // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference? - Some(b'[') => tokenizer.attempt(StateName::LabelEndReferenceFull, move |is_ok| { - State::Fn(if is_ok { - StateName::LabelEndOk - } else if defined { + Some(b'[') => tokenizer.attempt( + StateName::LabelEndReferenceFull, + State::Fn(StateName::LabelEndOk), + State::Fn(if defined { StateName::LabelEndReferenceNotFull } else { StateName::LabelEndNok - }) - }), + }), + ), // Shortcut (`[asd]`) reference? _ => { + // To do: use state names? let func = if defined { ok } else { nok }; func(tokenizer) } @@ -273,13 +276,11 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn reference_not_full(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(StateName::LabelEndReferenceCollapsed, |is_ok| { - State::Fn(if is_ok { - StateName::LabelEndOk - } else { - StateName::LabelEndNok - }) - }) + tokenizer.attempt( + StateName::LabelEndReferenceCollapsed, + State::Fn(StateName::LabelEndOk), + State::Fn(StateName::LabelEndNok), + ) } /// Done, we found something. @@ -375,7 +376,11 @@ pub fn resource_start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn resource_before(tokenizer: &mut Tokenizer) -> State { let state_name = space_or_tab_eol(tokenizer); - tokenizer.attempt_opt(state_name, StateName::LabelEndResourceOpen) + tokenizer.attempt( + state_name, + State::Fn(StateName::LabelEndResourceOpen), + State::Fn(StateName::LabelEndResourceOpen), + ) } /// At the start of a resource, after optional whitespace. @@ -395,13 +400,11 @@ pub fn resource_open(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_5 = Token::ResourceDestinationString; tokenizer.tokenize_state.size_other = RESOURCE_DESTINATION_BALANCE_MAX; - tokenizer.attempt(StateName::DestinationStart, |ok| { - State::Fn(if ok { - StateName::LabelEndResourceDestinationAfter - } else { - StateName::LabelEndResourceDestinationMissing - }) - }) + tokenizer.attempt( + StateName::DestinationStart, + State::Fn(StateName::LabelEndResourceDestinationAfter), + State::Fn(StateName::LabelEndResourceDestinationMissing), + ) } } @@ -419,14 +422,11 @@ pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_5 = Token::Data; tokenizer.tokenize_state.size_other = 0; let state_name = space_or_tab_eol(tokenizer); - - tokenizer.attempt(state_name, |ok| { - State::Fn(if ok { - StateName::LabelEndResourceBetween - } else { - StateName::LabelEndResourceEnd - }) - }) + tokenizer.attempt( + state_name, + State::Fn(StateName::LabelEndResourceBetween), + State::Fn(StateName::LabelEndResourceEnd), + ) } /// Without destination. @@ -452,7 +452,11 @@ pub fn resource_between(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::ResourceTitle; tokenizer.tokenize_state.token_2 = Token::ResourceTitleMarker; tokenizer.tokenize_state.token_3 = Token::ResourceTitleString; - tokenizer.go(StateName::TitleStart, StateName::LabelEndResourceTitleAfter) + tokenizer.attempt( + StateName::TitleStart, + State::Fn(StateName::LabelEndResourceTitleAfter), + State::Nok, + ) } _ => resource_end(tokenizer), } @@ -469,7 +473,11 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; let state_name = space_or_tab_eol(tokenizer); - tokenizer.attempt_opt(state_name, StateName::LabelEndResourceEnd) + tokenizer.attempt( + state_name, + State::Fn(StateName::LabelEndResourceEnd), + State::Fn(StateName::LabelEndResourceEnd), + ) } /// In a resource, at the `)`. @@ -503,7 +511,11 @@ pub fn reference_full(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Reference; tokenizer.tokenize_state.token_2 = Token::ReferenceMarker; tokenizer.tokenize_state.token_3 = Token::ReferenceString; - tokenizer.go(StateName::LabelStart, StateName::LabelEndReferenceFullAfter) + tokenizer.attempt( + StateName::LabelStart, + State::Fn(StateName::LabelEndReferenceFullAfter), + State::Nok, + ) } _ => unreachable!("expected `[`"), } diff --git a/src/construct/list.rs b/src/construct/list.rs index 6ecfb04..9e4b105 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -71,7 +71,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.go(state_name, StateName::ListBefore) + tokenizer.attempt(state_name, State::Fn(StateName::ListBefore), State::Nok) } else { State::Nok } @@ -86,13 +86,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Unordered. - Some(b'*' | b'-') => tokenizer.check(StateName::ThematicBreakStart, |ok| { - State::Fn(if ok { - StateName::ListNok - } else { - StateName::ListBeforeUnordered - }) - }), + Some(b'*' | b'-') => tokenizer.check( + StateName::ThematicBreakStart, + State::Fn(StateName::ListNok), + State::Fn(StateName::ListBeforeUnordered), + ), Some(b'+') => before_unordered(tokenizer), // Ordered. Some(b'0'..=b'9') if !tokenizer.interrupt => before_ordered(tokenizer), @@ -175,13 +173,11 @@ pub fn marker(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn marker_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 1; - tokenizer.check(StateName::BlankLineStart, |ok| { - State::Fn(if ok { - StateName::ListAfter - } else { - StateName::ListMarkerAfterFilled - }) - }) + tokenizer.check( + StateName::BlankLineStart, + State::Fn(StateName::ListAfter), + State::Fn(StateName::ListMarkerAfterFilled), + ) } /// After a list item marker, not followed by a blank line. @@ -194,13 +190,11 @@ pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 0; // Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace. - tokenizer.attempt(StateName::ListWhitespace, |ok| { - State::Fn(if ok { - StateName::ListAfter - } else { - StateName::ListPrefixOther - }) - }) + tokenizer.attempt( + StateName::ListWhitespace, + State::Fn(StateName::ListAfter), + State::Fn(StateName::ListPrefixOther), + ) } /// In whitespace after a marker. @@ -211,7 +205,11 @@ pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn whitespace(tokenizer: &mut Tokenizer) -> State { let state_name = space_or_tab_min_max(tokenizer, 1, TAB_SIZE); - tokenizer.go(state_name, StateName::ListWhitespaceAfter) + tokenizer.attempt( + state_name, + State::Fn(StateName::ListWhitespaceAfter), + State::Nok, + ) } /// After acceptable whitespace. @@ -295,13 +293,11 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn cont_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.check(StateName::BlankLineStart, |ok| { - State::Fn(if ok { - StateName::ListContBlank - } else { - StateName::ListContFilled - }) - }) + tokenizer.check( + StateName::BlankLineStart, + State::Fn(StateName::ListContBlank), + State::Fn(StateName::ListContFilled), + ) } /// Start of blank list item continuation. @@ -321,7 +317,7 @@ pub fn cont_blank(tokenizer: &mut Tokenizer) -> State { } else { let state_name = space_or_tab_min_max(tokenizer, 0, size); // Consume, optionally, at most `size`. - tokenizer.go(state_name, StateName::ListOk) + tokenizer.attempt(state_name, State::Fn(StateName::ListOk), State::Nok) } } @@ -340,7 +336,7 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State { // Consume exactly `size`. let state_name = space_or_tab_min_max(tokenizer, size, size); - tokenizer.go(state_name, StateName::ListOk) + tokenizer.attempt(state_name, State::Fn(StateName::ListOk), State::Nok) } /// A state fn to yield [`State::Ok`]. diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 0c8366e..070bdc0 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -109,13 +109,11 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { connect: tokenizer.tokenize_state.connect, }, ); - tokenizer.attempt(state_name, |ok| { - State::Fn(if ok { - StateName::LabelEolAfter - } else { - StateName::LabelAtBlankLine - }) - }) + tokenizer.attempt( + state_name, + State::Fn(StateName::LabelEolAfter), + State::Fn(StateName::LabelAtBlankLine), + ) } Some(b']') => { tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index b0b35a6..0b63b0e 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -180,13 +180,11 @@ pub fn eol_start(tokenizer: &mut Tokenizer) -> State { }, ); - tokenizer.attempt(state_name, move |ok| { - State::Fn(if ok { - StateName::SpaceOrTabEolAfterFirst - } else { - StateName::SpaceOrTabEolAtEol - }) - }) + tokenizer.attempt( + state_name, + State::Fn(StateName::SpaceOrTabEolAfterFirst), + State::Fn(StateName::SpaceOrTabEolAtEol), + ) } pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { @@ -269,7 +267,11 @@ pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { connect: tokenizer.tokenize_state.space_or_tab_eol_connect, }, ); - tokenizer.attempt_opt(state_name, StateName::SpaceOrTabEolAfterMore) + tokenizer.attempt( + state_name, + State::Fn(StateName::SpaceOrTabEolAfterMore), + State::Fn(StateName::SpaceOrTabEolAfterMore), + ) } /// `space_or_tab_eol`: after more (optional) `space_or_tab`. diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 8b72608..1d3e9b2 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -106,13 +106,11 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { }, ); - tokenizer.attempt(state_name, |ok| { - State::Fn(if ok { - StateName::TitleAfterEol - } else { - StateName::TitleAtBlankLine - }) - }) + tokenizer.attempt( + state_name, + State::Fn(StateName::TitleAfterEol), + State::Fn(StateName::TitleAtBlankLine), + ) } Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 4ed25b6..30fabe4 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -72,7 +72,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { }, ); - tokenizer.go(state_name, StateName::ThematicBreakBefore) + tokenizer.attempt( + state_name, + State::Fn(StateName::ThematicBreakBefore), + State::Nok, + ) } else { State::Nok } @@ -142,7 +146,11 @@ pub fn sequence(tokenizer: &mut Tokenizer) -> State { _ => { tokenizer.exit(Token::ThematicBreakSequence); let state_name = space_or_tab(tokenizer); - tokenizer.attempt_opt(state_name, StateName::ThematicBreakAtBreak) + tokenizer.attempt( + state_name, + State::Fn(StateName::ThematicBreakAtBreak), + State::Fn(StateName::ThematicBreakAtBreak), + ) } } } diff --git a/src/content/document.rs b/src/content/document.rs index 7a43d48..d02021a 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -106,7 +106,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.parse_state, ))); tokenizer.tokenize_state.document_child_state = Some(State::Fn(StateName::FlowStart)); - tokenizer.attempt_opt(StateName::BomStart, StateName::DocumentLineStart) + tokenizer.attempt( + StateName::BomStart, + State::Fn(StateName::DocumentLineStart), + State::Fn(StateName::DocumentLineStart), + ) } /// Start of a line. @@ -146,13 +150,11 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { }; tokenizer.container = Some(container); - tokenizer.attempt(state_name, |ok| { - State::Fn(if ok { - StateName::DocumentContainerExistingAfter - } else { - StateName::DocumentContainerExistingMissing - }) - }) + tokenizer.attempt( + state_name, + State::Fn(StateName::DocumentContainerExistingAfter), + State::Fn(StateName::DocumentContainerExistingMissing), + ) } // Otherwise, check new containers. else { @@ -235,13 +237,11 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { size: 0, }); - tokenizer.attempt(StateName::BlockQuoteStart, |ok| { - State::Fn(if ok { - StateName::DocumentContainerNewAfter - } else { - StateName::DocumentContainerNewBeforeNotBlockQuote - }) - }) + tokenizer.attempt( + StateName::BlockQuoteStart, + State::Fn(StateName::DocumentContainerNewAfter), + State::Fn(StateName::DocumentContainerNewBeforeNotBlockQuote), + ) } /// To do. @@ -253,13 +253,11 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State size: 0, }); - tokenizer.attempt(StateName::ListStart, |ok| { - State::Fn(if ok { - StateName::DocumentContainerNewAfter - } else { - StateName::DocumentContainersAfter - }) - }) + tokenizer.attempt( + StateName::ListStart, + State::Fn(StateName::DocumentContainerNewAfter), + State::Fn(StateName::DocumentContainersAfter), + ) } /// After a new container. diff --git a/src/content/flow.rs b/src/content/flow.rs index 6f62901..bfaf5e9 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -35,13 +35,11 @@ use crate::tokenizer::{State, StateName, Tokenizer}; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, - _ => tokenizer.attempt(StateName::BlankLineStart, |ok| { - State::Fn(if ok { - StateName::FlowBlankLineAfter - } else { - StateName::FlowBefore - }) - }), + _ => tokenizer.attempt( + StateName::BlankLineStart, + State::Fn(StateName::FlowBlankLineAfter), + State::Fn(StateName::FlowBefore), + ), } } @@ -58,27 +56,64 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// |
/// ``` pub fn before(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None => State::Ok, - _ => tokenizer.attempt_n( - vec![ - StateName::CodeIndentedStart, - StateName::CodeFencedStart, - StateName::HtmlFlowStart, - StateName::HeadingAtxStart, - StateName::HeadingSetextStart, - StateName::ThematicBreakStart, - StateName::DefinitionStart, - ], - |ok| { - State::Fn(if ok { - StateName::FlowAfter - } else { - StateName::FlowBeforeParagraph - }) - }, - ), - } + // match tokenizer.current { + // None => State::Ok, + // _ => { + tokenizer.attempt( + StateName::CodeIndentedStart, + State::Fn(StateName::FlowAfter), + State::Fn(StateName::FlowBeforeCodeFenced), + ) + // } + // } +} + +pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + StateName::CodeFencedStart, + State::Fn(StateName::FlowAfter), + State::Fn(StateName::FlowBeforeHtml), + ) +} + +pub fn before_html(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + StateName::HtmlFlowStart, + State::Fn(StateName::FlowAfter), + State::Fn(StateName::FlowBeforeHeadingAtx), + ) +} + +pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + StateName::HeadingAtxStart, + State::Fn(StateName::FlowAfter), + State::Fn(StateName::FlowBeforeHeadingSetext), + ) +} + +pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + StateName::HeadingSetextStart, + State::Fn(StateName::FlowAfter), + State::Fn(StateName::FlowBeforeThematicBreak), + ) +} + +pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + StateName::ThematicBreakStart, + State::Fn(StateName::FlowAfter), + State::Fn(StateName::FlowBeforeDefinition), + ) +} + +pub fn before_definition(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + StateName::DefinitionStart, + State::Fn(StateName::FlowAfter), + State::Fn(StateName::FlowBeforeParagraph), + ) } /// After a blank line. @@ -131,5 +166,9 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// |asd /// ``` pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { - tokenizer.go(StateName::ParagraphStart, StateName::FlowAfter) + tokenizer.attempt( + StateName::ParagraphStart, + State::Fn(StateName::FlowAfter), + State::Nok, + ) } diff --git a/src/content/string.rs b/src/content/string.rs index 697ec2c..b3df6ec 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -28,25 +28,27 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, - _ => tokenizer.attempt_n( - vec![ - StateName::CharacterReferenceStart, - StateName::CharacterEscapeStart, - ], - |ok| { - State::Fn(if ok { - StateName::StringBefore - } else { - StateName::StringBeforeData - }) - }, + Some(b'&') => tokenizer.attempt( + StateName::CharacterReferenceStart, + State::Fn(StateName::StringBefore), + State::Fn(StateName::StringBeforeData), ), + Some(b'\\') => tokenizer.attempt( + StateName::CharacterEscapeStart, + State::Fn(StateName::StringBefore), + State::Fn(StateName::StringBeforeData), + ), + _ => before_data(tokenizer), } } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.go(StateName::DataStart, StateName::StringBefore) + tokenizer.attempt( + StateName::DataStart, + State::Fn(StateName::StringBefore), + State::Nok, + ) } /// Resolve whitespace. diff --git a/src/content/text.rs b/src/content/text.rs index d8a2726..ff8c9eb 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -46,37 +46,80 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, - _ => tokenizer.attempt_n( - vec![ - StateName::AttentionStart, - StateName::AutolinkStart, - StateName::CharacterEscapeStart, - StateName::CharacterReferenceStart, - StateName::CodeTextStart, - StateName::HardBreakEscapeStart, - StateName::HtmlTextStart, - StateName::LabelEndStart, - StateName::LabelStartImageStart, - StateName::LabelStartLinkStart, - ], - |ok| { - State::Fn(if ok { - StateName::TextBefore - } else { - StateName::TextBeforeData - }) - }, + Some(b'!') => tokenizer.attempt( + StateName::LabelStartImageStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeData), ), + Some(b'&') => tokenizer.attempt( + StateName::CharacterReferenceStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeData), + ), + Some(b'*' | b'_') => tokenizer.attempt( + StateName::AttentionStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeData), + ), + // `autolink`, `html_text` (order does not matter) + Some(b'<') => tokenizer.attempt( + StateName::AutolinkStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeHtml), + ), + Some(b'[') => tokenizer.attempt( + StateName::LabelStartLinkStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeData), + ), + Some(b'\\') => tokenizer.attempt( + StateName::CharacterEscapeStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeHardBreakEscape), + ), + Some(b']') => tokenizer.attempt( + StateName::LabelEndStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeData), + ), + Some(b'`') => tokenizer.attempt( + StateName::CodeTextStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeData), + ), + _ => before_data(tokenizer), } } +/// To do. +pub fn before_html(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + StateName::HtmlTextStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeData), + ) +} + +/// To do. +pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + StateName::HardBreakEscapeStart, + State::Fn(StateName::TextBefore), + State::Fn(StateName::TextBeforeData), + ) +} + /// At data. /// /// ```markdown /// |qwe /// ``` pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.go(StateName::DataStart, StateName::TextBefore) + tokenizer.attempt( + StateName::DataStart, + State::Fn(StateName::TextBefore), + State::Nok, + ) } /// Resolve whitespace. diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7d28b77..1d02d5a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,6 +29,7 @@ pub enum ContentType { Text, } +/// To do. #[derive(Debug, PartialEq)] pub enum ByteAction { Normal(u8), @@ -83,8 +84,20 @@ pub struct Event { pub link: Option, } -pub struct Attempt { - done: Box State + 'static>, +#[derive(Debug, PartialEq)] +enum AttemptKind { + Attempt, + Check, +} + +/// To do. +#[derive(Debug)] +struct Attempt { + /// To do. + ok: State, + nok: State, + kind: AttemptKind, + state: Option, } /// Callback that can be registered and is called when the tokenizer is done. @@ -202,6 +215,12 @@ pub enum StateName { FlowStart, FlowBefore, + FlowBeforeCodeFenced, + FlowBeforeHtml, + FlowBeforeHeadingAtx, + FlowBeforeHeadingSetext, + FlowBeforeThematicBreak, + FlowBeforeDefinition, FlowAfter, FlowBlankLineAfter, FlowBeforeParagraph, @@ -350,6 +369,8 @@ pub enum StateName { TextStart, TextBefore, + TextBeforeHtml, + TextBeforeHardBreakEscape, TextBeforeData, ThematicBreakStart, @@ -488,6 +509,14 @@ impl StateName { StateName::FlowStart => content::flow::start, StateName::FlowBefore => content::flow::before, + + StateName::FlowBeforeCodeFenced => content::flow::before_code_fenced, + StateName::FlowBeforeHtml => content::flow::before_html, + StateName::FlowBeforeHeadingAtx => content::flow::before_heading_atx, + StateName::FlowBeforeHeadingSetext => content::flow::before_heading_setext, + StateName::FlowBeforeThematicBreak => content::flow::before_thematic_break, + StateName::FlowBeforeDefinition => content::flow::before_definition, + StateName::FlowAfter => content::flow::after, StateName::FlowBlankLineAfter => content::flow::blank_line_after, StateName::FlowBeforeParagraph => content::flow::before_paragraph, @@ -683,6 +712,8 @@ impl StateName { StateName::TextStart => content::text::start, StateName::TextBefore => content::text::before, + StateName::TextBeforeHtml => content::text::before_html, + StateName::TextBeforeHardBreakEscape => content::text::before_hard_break_escape, StateName::TextBeforeData => content::text::before_data, StateName::ThematicBreakStart => construct::thematic_break::start, @@ -1179,31 +1210,6 @@ impl<'a> Tokenizer<'a> { self.stack.truncate(previous.stack_len); } - /// Parse with `state_name` and its future states, switching to `ok` when - /// successful, and passing [`State::Nok`][] back up if it occurs. - /// - /// This function does not capture the current state, in case of - /// `State::Nok`, as it is assumed that this `go` is itself wrapped in - /// another `attempt`. - #[allow(clippy::unused_self)] - pub fn go(&mut self, state_name: StateName, after: StateName) -> State { - attempt_impl( - self, - state_name, - Box::new(move |_tokenizer: &mut Tokenizer, state| { - if matches!(state, State::Ok) { - State::Fn(after) - } else { - // Must be `Nok`. - // We don’t capture/free state because it is assumed that - // `go` itself is wrapped in another attempt that does that - // if it can occur. - state - } - }), - ) - } - /// Parse with `state_name` and its future states, to check if it result in /// [`State::Ok`][] or [`State::Nok`][], revert on both cases, and then /// call `done` with whether it was successful or not. @@ -1213,22 +1219,8 @@ impl<'a> Tokenizer<'a> { /// future states until it yields `State::Ok` or `State::Nok`. /// It then applies the captured state, calls `done`, and feeds all /// captured codes to its future states. - pub fn check( - &mut self, - state_name: StateName, - done: impl FnOnce(bool) -> State + 'static, - ) -> State { - let previous = self.capture(); - - attempt_impl( - self, - state_name, - Box::new(|tokenizer: &mut Tokenizer, state| { - tokenizer.free(previous); - tokenizer.consumed = true; - done(matches!(state, State::Ok)) - }), - ) + pub fn check(&mut self, state_name: StateName, ok: State, nok: State) -> State { + attempt_impl(self, state_name, ok, nok, AttemptKind::Check) } /// Parse with `state_name` and its future states, to check if it results in @@ -1242,80 +1234,8 @@ impl<'a> Tokenizer<'a> { /// `done` and yields its result. /// If instead `State::Nok` was yielded, the captured state is applied, /// `done` is called, and all captured codes are fed to its future states. - pub fn attempt( - &mut self, - state_name: StateName, - done: impl FnOnce(bool) -> State + 'static, - ) -> State { - let previous = self.capture(); - - log::debug!("attempting: {:?}", state_name); - // self.consumed = false; - attempt_impl( - self, - state_name, - Box::new(move |tokenizer: &mut Tokenizer, state| { - let ok = matches!(state, State::Ok); - - if !ok { - tokenizer.free(previous); - tokenizer.consumed = true; - } - - log::debug!( - "attempted {:?}: {:?}, at {:?}", - state_name, - ok, - tokenizer.point - ); - - done(ok) - }), - ) - } - - /// Just like [`attempt`][Tokenizer::attempt], but many. - pub fn attempt_n( - &mut self, - mut state_names: Vec, - done: impl FnOnce(bool) -> State + 'static, - ) -> State { - if state_names.is_empty() { - done(false) - } else { - let previous = self.capture(); - let state_name = state_names.remove(0); - self.consumed = false; - log::debug!("attempting (n): {:?}", state_name); - attempt_impl( - self, - state_name, - Box::new(move |tokenizer: &mut Tokenizer, state| { - let ok = matches!(state, State::Ok); - - log::debug!( - "attempted (n) {:?}: {:?}, at {:?}", - state_name, - ok, - tokenizer.point - ); - - if ok { - done(true) - } else { - tokenizer.free(previous); - tokenizer.consumed = true; - tokenizer.attempt_n(state_names, done) - } - }), - ) - } - } - - /// Just like [`attempt`][Tokenizer::attempt], but for when you don’t care - /// about `ok`. - pub fn attempt_opt(&mut self, state_name: StateName, after: StateName) -> State { - self.attempt(state_name, move |_ok| State::Fn(after)) + pub fn attempt(&mut self, state_name: StateName, ok: State, nok: State) -> State { + attempt_impl(self, state_name, ok, nok, AttemptKind::Attempt) } /// Feed a list of `codes` into `start`. @@ -1336,9 +1256,18 @@ impl<'a> Tokenizer<'a> { match state { State::Ok | State::Nok => { if let Some(attempt) = self.attempts.pop() { - let done = attempt.done; + if attempt.kind == AttemptKind::Check || state == State::Nok { + if let Some(state) = attempt.state { + self.free(state); + } + } + self.consumed = true; - state = done(self, state); + state = if state == State::Ok { + attempt.ok + } else { + attempt.nok + }; } else { break; } @@ -1375,9 +1304,18 @@ impl<'a> Tokenizer<'a> { match state { State::Ok | State::Nok => { if let Some(attempt) = self.attempts.pop() { - let done = attempt.done; + if attempt.kind == AttemptKind::Check || state == State::Nok { + if let Some(state) = attempt.state { + self.free(state); + } + } + self.consumed = true; - state = done(self, state); + state = if state == State::Ok { + attempt.ok + } else { + attempt.nok + }; } else { break; } @@ -1480,9 +1418,25 @@ fn byte_action(bytes: &[u8], point: &Point) -> ByteAction { fn attempt_impl( tokenizer: &mut Tokenizer, state_name: StateName, - done: Box State + 'static>, + ok: State, + nok: State, + kind: AttemptKind, ) -> State { - tokenizer.attempts.push(Attempt { done }); + // Always capture (and restore) when checking. + // No need to capture (and restore) when `nok` is `State::Nok`, because the + // parent attempt will do it. + let state = if kind == AttemptKind::Check || nok != State::Nok { + Some(tokenizer.capture()) + } else { + None + }; + + tokenizer.attempts.push(Attempt { + ok, + nok, + kind, + state, + }); call_impl(tokenizer, state_name) } -- cgit