diff options
-rw-r--r-- | src/construct/blank_line.rs | 6 | ||||
-rw-r--r-- | src/construct/block_quote.rs | 16 | ||||
-rw-r--r-- | src/construct/code_fenced.rs | 75 | ||||
-rw-r--r-- | src/construct/code_indented.rs | 30 | ||||
-rw-r--r-- | src/construct/definition.rs | 51 | ||||
-rw-r--r-- | src/construct/heading_atx.rs | 14 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 16 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 15 | ||||
-rw-r--r-- | src/construct/html_text.rs | 95 | ||||
-rw-r--r-- | src/construct/label_end.rs | 71 | ||||
-rw-r--r-- | src/construct/list.rs | 46 | ||||
-rw-r--r-- | src/construct/partial_label.rs | 13 | ||||
-rw-r--r-- | src/construct/partial_space_or_tab_eol.rs | 28 | ||||
-rw-r--r-- | src/construct/partial_title.rs | 14 | ||||
-rw-r--r-- | src/construct/thematic_break.rs | 16 | ||||
-rw-r--r-- | src/content/document.rs | 26 | ||||
-rw-r--r-- | src/content/flow.rs | 120 | ||||
-rw-r--r-- | src/content/string.rs | 31 | ||||
-rw-r--r-- | src/content/text.rs | 111 | ||||
-rw-r--r-- | src/tokenizer.rs | 19 |
20 files changed, 400 insertions, 413 deletions
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index 2adc7a4..eeef637 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -47,12 +47,12 @@ use crate::tokenizer::Tokenizer; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::BlankLineAfter), State::Next(StateName::BlankLineAfter), - ) + ); + + State::Retry(space_or_tab(tokenizer)) } /// After zero or more spaces or tabs, before a line ending or EOF. diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index f2b0179..1a32f7d 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -47,7 +47,8 @@ use crate::tokenizer::Tokenizer; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.block_quote { - let name = space_or_tab_min_max( + tokenizer.attempt(State::Next(StateName::BlockQuoteBefore), State::Nok); + State::Retry(space_or_tab_min_max( tokenizer, 0, if tokenizer.parse_state.constructs.code_indented { @@ -55,8 +56,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } else { usize::MAX }, - ); - tokenizer.attempt(name, State::Next(StateName::BlockQuoteBefore), State::Nok) + )) } else { State::Nok } @@ -86,7 +86,8 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn cont_start(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab_min_max( + tokenizer.attempt(State::Next(StateName::BlockQuoteContBefore), State::Nok); + State::Retry(space_or_tab_min_max( tokenizer, 0, if tokenizer.parse_state.constructs.code_indented { @@ -94,12 +95,7 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State { } else { usize::MAX }, - ); - tokenizer.attempt( - name, - State::Next(StateName::BlockQuoteContBefore), - State::Nok, - ) + )) } /// After whitespace, before `>`. diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 6e29010..ed39917 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -120,7 +120,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.code_fenced { tokenizer.enter(Name::CodeFenced); tokenizer.enter(Name::CodeFencedFence); - let name = space_or_tab_min_max( + + tokenizer.attempt( + State::Next(StateName::CodeFencedBeforeSequenceOpen), + State::Nok, + ); + State::Retry(space_or_tab_min_max( tokenizer, 0, if tokenizer.parse_state.constructs.code_indented { @@ -128,12 +133,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } else { usize::MAX }, - ); - tokenizer.attempt( - name, - State::Next(StateName::CodeFencedBeforeSequenceOpen), - State::Nok, - ) + )) } else { State::Nok } @@ -188,12 +188,13 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { } _ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => { tokenizer.exit(Name::CodeFencedFenceSequence); - let name = space_or_tab(tokenizer); + tokenizer.attempt( - name, State::Next(StateName::CodeFencedInfoBefore), State::Next(StateName::CodeFencedInfoBefore), - ) + ); + + State::Retry(space_or_tab(tokenizer)) } _ => { tokenizer.tokenize_state.marker = 0; @@ -219,10 +220,10 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State { // Do not form containers. tokenizer.concrete = true; tokenizer.check( - StateName::NonLazyContinuationStart, State::Next(StateName::CodeFencedAtNonLazyBreak), State::Next(StateName::CodeFencedAfter), - ) + ); + State::Retry(StateName::NonLazyContinuationStart) } _ => { tokenizer.enter(Name::CodeFencedFenceInfo); @@ -250,12 +251,11 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { Some(b'\t' | b' ') => { tokenizer.exit(Name::Data); tokenizer.exit(Name::CodeFencedFenceInfo); - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::CodeFencedMetaBefore), State::Next(StateName::CodeFencedMetaBefore), - ) + ); + State::Retry(space_or_tab(tokenizer)) } Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { tokenizer.concrete = false; @@ -330,10 +330,10 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::CodeFencedCloseBefore, State::Next(StateName::CodeFencedAfter), State::Next(StateName::CodeFencedContentBefore), - ) + ); + State::Retry(StateName::CodeFencedCloseBefore) } /// Before a closing fence, at the line ending. @@ -366,7 +366,13 @@ pub fn close_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn close_start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Name::CodeFencedFence); - let name = space_or_tab_min_max( + + tokenizer.attempt( + State::Next(StateName::CodeFencedBeforeSequenceClose), + State::Nok, + ); + + State::Retry(space_or_tab_min_max( tokenizer, 0, if tokenizer.parse_state.constructs.code_indented { @@ -374,12 +380,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { } else { usize::MAX }, - ); - tokenizer.attempt( - name, - State::Next(StateName::CodeFencedBeforeSequenceClose), - State::Nok, - ) + )) } /// In a closing fence, after optional whitespace, before sequence. @@ -420,12 +421,11 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { { tokenizer.tokenize_state.size_b = 0; tokenizer.exit(Name::CodeFencedFenceSequence); - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::CodeFencedAfterSequenceClose), State::Next(StateName::CodeFencedAfterSequenceClose), - ) + ); + State::Retry(space_or_tab(tokenizer)) } _ => { tokenizer.tokenize_state.size_b = 0; @@ -475,12 +475,15 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { /// | ~~~ /// ``` pub fn content_start(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.size_c); tokenizer.attempt( - name, State::Next(StateName::CodeFencedBeforeContentChunk), State::Nok, - ) + ); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + tokenizer.tokenize_state.size_c, + )) } /// Before code content, after a prefix. @@ -493,11 +496,13 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => tokenizer.check( - StateName::NonLazyContinuationStart, - State::Next(StateName::CodeFencedAtNonLazyBreak), - State::Next(StateName::CodeFencedAfter), - ), + None | Some(b'\n') => { + tokenizer.check( + State::Next(StateName::CodeFencedAtNonLazyBreak), + State::Next(StateName::CodeFencedAfter), + ); + State::Retry(StateName::NonLazyContinuationStart) + } _ => { tokenizer.enter(Name::CodeFlowChunk); State::Retry(StateName::CodeFencedContentChunk) diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 598d2b0..2ab117e 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -65,12 +65,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { // Do not interrupt paragraphs. if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented { tokenizer.enter(Name::CodeIndented); - let name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); - tokenizer.attempt( - name, - State::Next(StateName::CodeIndentedAtBreak), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::CodeIndentedAtBreak), State::Nok); + State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE)) } else { State::Nok } @@ -85,11 +81,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Retry(StateName::CodeIndentedAfter), - Some(b'\n') => tokenizer.attempt( - StateName::CodeIndentedFurtherStart, - State::Next(StateName::CodeIndentedAtBreak), - State::Next(StateName::CodeIndentedAfter), - ), + Some(b'\n') => { + tokenizer.attempt( + State::Next(StateName::CodeIndentedAtBreak), + State::Next(StateName::CodeIndentedAfter), + ); + State::Retry(StateName::CodeIndentedFurtherStart) + } _ => { tokenizer.enter(Name::CodeFlowChunk); State::Retry(StateName::CodeIndentedInside) @@ -145,12 +143,11 @@ pub fn further_start(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::CodeIndentedFurtherStart) } _ if !tokenizer.lazy => { - let name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); tokenizer.attempt( - name, State::Next(StateName::CodeIndentedFurtherEnd), State::Next(StateName::CodeIndentedFurtherBegin), - ) + ); + State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE)) } _ => State::Nok, } @@ -175,12 +172,11 @@ pub fn further_end(_tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn further_begin(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::CodeIndentedFurtherAfter), State::Next(StateName::CodeIndentedFurtherAfter), - ) + ); + State::Retry(space_or_tab(tokenizer)) } /// After whitespace, not indented enough. diff --git a/src/construct/definition.rs b/src/construct/definition.rs index ee930b1..11f1062 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -124,13 +124,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { if possible && tokenizer.parse_state.constructs.definition { tokenizer.enter(Name::Definition); - // Note: arbitrary whitespace allowed even if code (indented) is on. - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::DefinitionBefore), State::Next(StateName::DefinitionBefore), - ) + ); + // Note: arbitrary whitespace allowed even if code (indented) is on. + State::Retry(space_or_tab(tokenizer)) } else { State::Nok } @@ -148,11 +147,8 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::DefinitionLabel; tokenizer.tokenize_state.token_2 = Name::DefinitionLabelMarker; tokenizer.tokenize_state.token_3 = Name::DefinitionLabelString; - tokenizer.attempt( - StateName::LabelStart, - State::Next(StateName::DefinitionLabelAfter), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::DefinitionLabelAfter), State::Nok); + State::Retry(StateName::LabelStart) } _ => State::Nok, } @@ -193,12 +189,11 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn marker_after(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab_eol(tokenizer); tokenizer.attempt( - name, State::Next(StateName::DefinitionDestinationBefore), State::Next(StateName::DefinitionDestinationBefore), - ) + ); + State::Retry(space_or_tab_eol(tokenizer)) } /// Before a destination. @@ -215,10 +210,10 @@ pub fn destination_before(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_5 = Name::DefinitionDestinationString; tokenizer.tokenize_state.size_b = usize::MAX; tokenizer.attempt( - StateName::DestinationStart, State::Next(StateName::DefinitionDestinationAfter), State::Next(StateName::DefinitionDestinationMissing), - ) + ); + State::Retry(StateName::DestinationStart) } /// After a destination. @@ -235,10 +230,10 @@ pub fn destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_5 = Name::Data; tokenizer.tokenize_state.size_b = 0; tokenizer.attempt( - StateName::DefinitionTitleBefore, State::Next(StateName::DefinitionAfter), State::Next(StateName::DefinitionAfter), - ) + ); + State::Retry(StateName::DefinitionTitleBefore) } /// Without destination. @@ -262,12 +257,11 @@ pub fn destination_missing(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::DefinitionAfterWhitespace), State::Next(StateName::DefinitionAfterWhitespace), - ) + ); + State::Retry(space_or_tab(tokenizer)) } /// After a definition, after optional whitespace. @@ -306,7 +300,7 @@ pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State { _ => { tokenizer.tokenize_state.end = 0; State::Nok - }, + } } } @@ -319,12 +313,11 @@ pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn title_before(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab_eol(tokenizer); tokenizer.attempt( - name, State::Next(StateName::DefinitionTitleBeforeMarker), State::Nok, - ) + ); + State::Retry(space_or_tab_eol(tokenizer)) } /// Before a title, after a line ending. @@ -338,11 +331,8 @@ pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::DefinitionTitle; tokenizer.tokenize_state.token_2 = Name::DefinitionTitleMarker; tokenizer.tokenize_state.token_3 = Name::DefinitionTitleString; - tokenizer.attempt( - StateName::TitleStart, - State::Next(StateName::DefinitionTitleAfter), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::DefinitionTitleAfter), State::Nok); + State::Retry(StateName::TitleStart) } /// After a title. @@ -355,12 +345,11 @@ pub fn title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; tokenizer.tokenize_state.token_3 = Name::Data; - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), - ) + ); + State::Retry(space_or_tab(tokenizer)) } /// After a title, after optional whitespace. diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 4e656d4..17cf617 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -70,7 +70,8 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.heading_atx { tokenizer.enter(Name::HeadingAtx); - let name = space_or_tab_min_max( + tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok); + State::Retry(space_or_tab_min_max( tokenizer, 0, if tokenizer.parse_state.constructs.code_indented { @@ -78,8 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } else { usize::MAX }, - ); - tokenizer.attempt(name, State::Next(StateName::HeadingAtxBefore), State::Nok) + )) } else { State::Nok } @@ -121,8 +121,8 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { _ if tokenizer.tokenize_state.size > 0 => { tokenizer.tokenize_state.size = 0; tokenizer.exit(Name::HeadingAtxSequence); - let name = space_or_tab(tokenizer); - tokenizer.attempt(name, State::Next(StateName::HeadingAtxAtBreak), State::Nok) + tokenizer.attempt(State::Next(StateName::HeadingAtxAtBreak), State::Nok); + State::Retry(space_or_tab(tokenizer)) } _ => { tokenizer.tokenize_state.size = 0; @@ -147,8 +147,8 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { State::Ok } Some(b'\t' | b' ') => { - let name = space_or_tab(tokenizer); - tokenizer.attempt(name, State::Next(StateName::HeadingAtxAtBreak), State::Nok) + tokenizer.attempt(State::Next(StateName::HeadingAtxAtBreak), State::Nok); + State::Retry(space_or_tab(tokenizer)) } Some(b'#') => { tokenizer.enter(Name::HeadingAtxSequence); diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 91a40b3..e31ce76 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -85,7 +85,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { .name == Name::Paragraph) { - let name = space_or_tab_min_max( + tokenizer.attempt(State::Next(StateName::HeadingSetextBefore), State::Nok); + State::Retry(space_or_tab_min_max( tokenizer, 0, if tokenizer.parse_state.constructs.code_indented { @@ -93,13 +94,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } else { usize::MAX }, - ); - - tokenizer.attempt( - name, - State::Next(StateName::HeadingSetextBefore), - State::Nok, - ) + )) } else { State::Nok } @@ -139,12 +134,11 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { _ => { tokenizer.tokenize_state.marker = 0; tokenizer.exit(Name::HeadingSetextUnderline); - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::HeadingSetextAfter), State::Next(StateName::HeadingSetextAfter), - ) + ); + State::Retry(space_or_tab(tokenizer)) } } } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 9998797..b5e1815 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -133,7 +133,8 @@ const COMPLETE: u8 = 7; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.html_flow { tokenizer.enter(Name::HtmlFlow); - let name = space_or_tab_with_options( + tokenizer.attempt(State::Next(StateName::HtmlFlowBefore), State::Nok); + State::Retry(space_or_tab_with_options( tokenizer, SpaceOrTabOptions { kind: Name::HtmlFlowData, @@ -146,9 +147,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { connect: false, content_type: None, }, - ); - - tokenizer.attempt(name, State::Next(StateName::HtmlFlowBefore), State::Nok) + )) } else { State::Nok } @@ -632,10 +631,10 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { { tokenizer.exit(Name::HtmlFlowData); tokenizer.check( - StateName::HtmlFlowBlankLineBefore, State::Next(StateName::HtmlFlowContinuationAfter), State::Next(StateName::HtmlFlowContinuationStart), - ) + ); + State::Retry(StateName::HtmlFlowBlankLineBefore) } // Note: important that this is after the basic/complete case. None | Some(b'\n') => { @@ -678,10 +677,10 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { tokenizer.check( - StateName::NonLazyContinuationStart, State::Next(StateName::HtmlFlowContinuationStartNonLazy), State::Next(StateName::HtmlFlowContinuationAfter), - ) + ); + State::Retry(StateName::NonLazyContinuationStart) } /// In continuation, at an eol, before non-lazy content. diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 1b15956..b92b9fa 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -208,11 +208,10 @@ pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { pub fn comment(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Nok, - Some(b'\n') => tokenizer.attempt( - StateName::HtmlTextLineEndingBefore, - State::Next(StateName::HtmlTextComment), - State::Nok, - ), + Some(b'\n') => { + tokenizer.attempt(State::Next(StateName::HtmlTextComment), State::Nok); + State::Retry(StateName::HtmlTextLineEndingBefore) + } Some(b'-') => { tokenizer.consume(); State::Next(StateName::HtmlTextCommentClose) @@ -271,11 +270,10 @@ pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { pub fn cdata(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Nok, - Some(b'\n') => tokenizer.attempt( - StateName::HtmlTextLineEndingBefore, - State::Next(StateName::HtmlTextCdata), - State::Nok, - ), + Some(b'\n') => { + tokenizer.attempt(State::Next(StateName::HtmlTextCdata), State::Nok); + State::Retry(StateName::HtmlTextLineEndingBefore) + } Some(b']') => { tokenizer.consume(); State::Next(StateName::HtmlTextCdataClose) @@ -326,11 +324,10 @@ pub fn cdata_end(tokenizer: &mut Tokenizer) -> State { pub fn declaration(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'>') => State::Retry(StateName::HtmlTextEnd), - Some(b'\n') => tokenizer.attempt( - StateName::HtmlTextLineEndingBefore, - State::Next(StateName::HtmlTextDeclaration), - State::Nok, - ), + Some(b'\n') => { + tokenizer.attempt(State::Next(StateName::HtmlTextDeclaration), State::Nok); + State::Retry(StateName::HtmlTextLineEndingBefore) + } _ => { tokenizer.consume(); State::Next(StateName::HtmlTextDeclaration) @@ -347,11 +344,10 @@ pub fn declaration(tokenizer: &mut Tokenizer) -> State { pub fn instruction(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Nok, - Some(b'\n') => tokenizer.attempt( - StateName::HtmlTextLineEndingBefore, - State::Next(StateName::HtmlTextInstruction), - State::Nok, - ), + Some(b'\n') => { + tokenizer.attempt(State::Next(StateName::HtmlTextInstruction), State::Nok); + State::Retry(StateName::HtmlTextLineEndingBefore) + } Some(b'?') => { tokenizer.consume(); State::Next(StateName::HtmlTextInstructionClose) @@ -418,11 +414,10 @@ pub fn tag_close(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\n') => tokenizer.attempt( - StateName::HtmlTextLineEndingBefore, - State::Next(StateName::HtmlTextTagCloseBetween), - State::Nok, - ), + Some(b'\n') => { + tokenizer.attempt(State::Next(StateName::HtmlTextTagCloseBetween), State::Nok); + State::Retry(StateName::HtmlTextLineEndingBefore) + } Some(b'\t' | b' ') => { tokenizer.consume(); State::Next(StateName::HtmlTextTagCloseBetween) @@ -457,11 +452,10 @@ pub fn tag_open(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\n') => tokenizer.attempt( - StateName::HtmlTextLineEndingBefore, - State::Next(StateName::HtmlTextTagOpenBetween), - State::Nok, - ), + Some(b'\n') => { + tokenizer.attempt(State::Next(StateName::HtmlTextTagOpenBetween), State::Nok); + State::Retry(StateName::HtmlTextLineEndingBefore) + } Some(b'\t' | b' ') => { tokenizer.consume(); State::Next(StateName::HtmlTextTagOpenBetween) @@ -505,11 +499,13 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\n') => tokenizer.attempt( - StateName::HtmlTextLineEndingBefore, - State::Next(StateName::HtmlTextTagOpenAttributeNameAfter), - State::Nok, - ), + Some(b'\n') => { + tokenizer.attempt( + State::Next(StateName::HtmlTextTagOpenAttributeNameAfter), + State::Nok, + ); + State::Retry(StateName::HtmlTextLineEndingBefore) + } Some(b'\t' | b' ') => { tokenizer.consume(); State::Next(StateName::HtmlTextTagOpenAttributeNameAfter) @@ -532,11 +528,13 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok, - Some(b'\n') => tokenizer.attempt( - StateName::HtmlTextLineEndingBefore, - State::Next(StateName::HtmlTextTagOpenAttributeValueBefore), - State::Nok, - ), + Some(b'\n') => { + tokenizer.attempt( + State::Next(StateName::HtmlTextTagOpenAttributeValueBefore), + State::Nok, + ); + State::Retry(StateName::HtmlTextLineEndingBefore) + } Some(b'\t' | b' ') => { tokenizer.consume(); State::Next(StateName::HtmlTextTagOpenAttributeValueBefore) @@ -565,11 +563,13 @@ pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = 0; State::Nok } - Some(b'\n') => tokenizer.attempt( - StateName::HtmlTextLineEndingBefore, - State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted), - State::Nok, - ), + Some(b'\n') => { + tokenizer.attempt( + State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted), + State::Nok, + ); + State::Retry(StateName::HtmlTextLineEndingBefore) + } Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.marker = 0; tokenizer.consume(); @@ -665,12 +665,11 @@ pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::HtmlTextLineEndingAfterPrefix), State::Next(StateName::HtmlTextLineEndingAfterPrefix), - ) + ); + State::Retry(space_or_tab(tokenizer)) } /// After a line ending, after indent. diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index bfded05..da53125 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -223,25 +223,29 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Resource (`[asd](fgh)`)? - Some(b'(') => tokenizer.attempt( - StateName::LabelEndResourceStart, - State::Next(StateName::LabelEndOk), - State::Next(if defined { - StateName::LabelEndOk - } else { - StateName::LabelEndNok - }), - ), + Some(b'(') => { + tokenizer.attempt( + State::Next(StateName::LabelEndOk), + State::Next(if defined { + StateName::LabelEndOk + } else { + StateName::LabelEndNok + }), + ); + State::Retry(StateName::LabelEndResourceStart) + } // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference? - Some(b'[') => tokenizer.attempt( - StateName::LabelEndReferenceFull, - State::Next(StateName::LabelEndOk), - State::Next(if defined { - StateName::LabelEndReferenceNotFull - } else { - StateName::LabelEndNok - }), - ), + Some(b'[') => { + tokenizer.attempt( + State::Next(StateName::LabelEndOk), + State::Next(if defined { + StateName::LabelEndReferenceNotFull + } else { + StateName::LabelEndNok + }), + ); + State::Retry(StateName::LabelEndReferenceFull) + } // Shortcut (`[asd]`) reference? _ => State::Retry(if defined { StateName::LabelEndOk @@ -263,10 +267,10 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn reference_not_full(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::LabelEndReferenceCollapsed, State::Next(StateName::LabelEndOk), State::Next(StateName::LabelEndNok), - ) + ); + State::Retry(StateName::LabelEndReferenceCollapsed) } /// Done, we found something. @@ -354,12 +358,11 @@ pub fn resource_start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn resource_before(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab_eol(tokenizer); tokenizer.attempt( - name, State::Next(StateName::LabelEndResourceOpen), State::Next(StateName::LabelEndResourceOpen), - ) + ); + State::Retry(space_or_tab_eol(tokenizer)) } /// At the start of a resource, after optional whitespace. @@ -380,10 +383,10 @@ pub fn resource_open(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size_b = RESOURCE_DESTINATION_BALANCE_MAX; tokenizer.attempt( - StateName::DestinationStart, State::Next(StateName::LabelEndResourceDestinationAfter), State::Next(StateName::LabelEndResourceDestinationMissing), - ) + ); + State::Retry(StateName::DestinationStart) } } @@ -400,12 +403,11 @@ pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_4 = Name::Data; tokenizer.tokenize_state.token_5 = Name::Data; tokenizer.tokenize_state.size_b = 0; - let name = space_or_tab_eol(tokenizer); tokenizer.attempt( - name, State::Next(StateName::LabelEndResourceBetween), State::Next(StateName::LabelEndResourceEnd), - ) + ); + State::Retry(space_or_tab_eol(tokenizer)) } /// Without destination. @@ -432,10 +434,10 @@ pub fn resource_between(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_2 = Name::ResourceTitleMarker; tokenizer.tokenize_state.token_3 = Name::ResourceTitleString; tokenizer.attempt( - StateName::TitleStart, State::Next(StateName::LabelEndResourceTitleAfter), State::Nok, - ) + ); + State::Retry(StateName::TitleStart) } _ => State::Retry(StateName::LabelEndResourceEnd), } @@ -451,12 +453,11 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; tokenizer.tokenize_state.token_3 = Name::Data; - let name = space_or_tab_eol(tokenizer); tokenizer.attempt( - name, State::Next(StateName::LabelEndResourceEnd), State::Next(StateName::LabelEndResourceEnd), - ) + ); + State::Retry(space_or_tab_eol(tokenizer)) } /// In a resource, at the `)`. @@ -491,10 +492,10 @@ pub fn reference_full(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_2 = Name::ReferenceMarker; tokenizer.tokenize_state.token_3 = Name::ReferenceString; tokenizer.attempt( - StateName::LabelStart, State::Next(StateName::LabelEndReferenceFullAfter), State::Nok, - ) + ); + State::Retry(StateName::LabelStart) } _ => unreachable!("expected `[`"), } diff --git a/src/construct/list.rs b/src/construct/list.rs index 1da1f4e..20be73c 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -64,7 +64,8 @@ use crate::util::{ pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.list { tokenizer.enter(Name::ListItem); - let name = space_or_tab_min_max( + tokenizer.attempt(State::Next(StateName::ListBefore), State::Nok); + State::Retry(space_or_tab_min_max( tokenizer, 0, if tokenizer.parse_state.constructs.code_indented { @@ -72,8 +73,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } else { usize::MAX }, - ); - tokenizer.attempt(name, State::Next(StateName::ListBefore), State::Nok) + )) } else { State::Nok } @@ -88,11 +88,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Unordered. - Some(b'*' | b'-') => tokenizer.check( - StateName::ThematicBreakStart, - State::Next(StateName::ListNok), - State::Next(StateName::ListBeforeUnordered), - ), + Some(b'*' | b'-') => { + tokenizer.check( + State::Next(StateName::ListNok), + State::Next(StateName::ListBeforeUnordered), + ); + State::Retry(StateName::ThematicBreakStart) + } Some(b'+') => State::Retry(StateName::ListBeforeUnordered), // Ordered. Some(b'0'..=b'9') if !tokenizer.interrupt => State::Retry(StateName::ListBeforeOrdered), @@ -176,10 +178,10 @@ pub fn marker(tokenizer: &mut Tokenizer) -> State { pub fn marker_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 1; tokenizer.check( - StateName::BlankLineStart, State::Next(StateName::ListAfter), State::Next(StateName::ListMarkerAfterFilled), - ) + ); + State::Retry(StateName::BlankLineStart) } /// After a list item marker, not followed by a blank line. @@ -193,10 +195,10 @@ pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State { // Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace. tokenizer.attempt( - StateName::ListWhitespace, State::Next(StateName::ListAfter), State::Next(StateName::ListPrefixOther), - ) + ); + State::Retry(StateName::ListWhitespace) } /// In whitespace after a marker. @@ -206,12 +208,8 @@ pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn whitespace(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab_min_max(tokenizer, 1, TAB_SIZE); - tokenizer.attempt( - name, - State::Next(StateName::ListWhitespaceAfter), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::ListWhitespaceAfter), State::Nok); + State::Retry(space_or_tab_min_max(tokenizer, 1, TAB_SIZE)) } /// After acceptable whitespace. @@ -298,10 +296,10 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn cont_start(tokenizer: &mut Tokenizer) -> State { tokenizer.check( - StateName::BlankLineStart, State::Next(StateName::ListContBlank), State::Next(StateName::ListContFilled), - ) + ); + State::Retry(StateName::BlankLineStart) } /// Start of blank list item continuation. @@ -320,9 +318,9 @@ pub fn cont_blank(tokenizer: &mut Tokenizer) -> State { if container.blank_initial { State::Nok } else { - let name = space_or_tab_min_max(tokenizer, 0, size); + tokenizer.attempt(State::Next(StateName::ListOk), State::Nok); // Consume, optionally, at most `size`. - tokenizer.attempt(name, State::Next(StateName::ListOk), State::Nok) + State::Retry(space_or_tab_min_max(tokenizer, 0, size)) } } @@ -340,9 +338,9 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State { container.blank_initial = false; + tokenizer.attempt(State::Next(StateName::ListOk), State::Nok); // Consume exactly `size`. - let name = space_or_tab_min_max(tokenizer, size, size); - tokenizer.attempt(name, State::Next(StateName::ListOk), State::Nok) + State::Retry(space_or_tab_min_max(tokenizer, size, size)) } /// A state fn to yield [`State::Ok`]. diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 762baaa..0f7aa00 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -103,18 +103,17 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } else { match tokenizer.current { Some(b'\n') => { - let name = space_or_tab_eol_with_options( + tokenizer.attempt( + State::Next(StateName::LabelEolAfter), + State::Next(StateName::LabelAtBlankLine), + ); + State::Retry(space_or_tab_eol_with_options( tokenizer, Options { content_type: Some(Content::String), connect: tokenizer.tokenize_state.connect, }, - ); - tokenizer.attempt( - name, - State::Next(StateName::LabelEolAfter), - State::Next(StateName::LabelAtBlankLine), - ) + )) } Some(b']') => { tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs index 0807a5f..2127fe6 100644 --- a/src/construct/partial_space_or_tab_eol.rs +++ b/src/construct/partial_space_or_tab_eol.rs @@ -45,7 +45,12 @@ pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options } pub fn eol_start(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab_with_options( + tokenizer.attempt( + State::Next(StateName::SpaceOrTabEolAfterFirst), + State::Next(StateName::SpaceOrTabEolAtEol), + ); + + State::Retry(space_or_tab_with_options( tokenizer, SpaceOrTabOptions { kind: Name::SpaceOrTab, @@ -57,13 +62,7 @@ pub fn eol_start(tokenizer: &mut Tokenizer) -> State { .clone(), connect: tokenizer.tokenize_state.space_or_tab_eol_connect, }, - ); - - tokenizer.attempt( - name, - State::Next(StateName::SpaceOrTabEolAfterFirst), - State::Next(StateName::SpaceOrTabEolAtEol), - ) + )) } pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { @@ -133,7 +132,11 @@ pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State { /// ``` #[allow(clippy::needless_pass_by_value)] pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { - let name = space_or_tab_with_options( + tokenizer.attempt( + State::Next(StateName::SpaceOrTabEolAfterMore), + State::Next(StateName::SpaceOrTabEolAfterMore), + ); + State::Retry(space_or_tab_with_options( tokenizer, SpaceOrTabOptions { kind: Name::SpaceOrTab, @@ -145,12 +148,7 @@ pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { .clone(), connect: tokenizer.tokenize_state.space_or_tab_eol_connect, }, - ); - tokenizer.attempt( - name, - State::Next(StateName::SpaceOrTabEolAfterMore), - State::Next(StateName::SpaceOrTabEolAfterMore), - ) + )) } /// `space_or_tab_eol`: after more (optional) `space_or_tab`. diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 6421360..6f7a037 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -99,19 +99,17 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { State::Nok } Some(b'\n') => { - let name = space_or_tab_eol_with_options( + tokenizer.attempt( + State::Next(StateName::TitleAfterEol), + State::Next(StateName::TitleAtBlankLine), + ); + State::Retry(space_or_tab_eol_with_options( tokenizer, Options { content_type: Some(Content::String), connect: tokenizer.tokenize_state.connect, }, - ); - - tokenizer.attempt( - name, - State::Next(StateName::TitleAfterEol), - State::Next(StateName::TitleAtBlankLine), - ) + )) } Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 5969e77..f4d008d 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -63,7 +63,8 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.thematic_break { tokenizer.enter(Name::ThematicBreak); - let name = space_or_tab_min_max( + tokenizer.attempt(State::Next(StateName::ThematicBreakBefore), State::Nok); + State::Retry(space_or_tab_min_max( tokenizer, 0, if tokenizer.parse_state.constructs.code_indented { @@ -71,13 +72,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } else { usize::MAX }, - ); - - tokenizer.attempt( - name, - State::Next(StateName::ThematicBreakBefore), - State::Nok, - ) + )) } else { State::Nok } @@ -146,12 +141,11 @@ pub fn sequence(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.exit(Name::ThematicBreakSequence); - let name = space_or_tab(tokenizer); tokenizer.attempt( - name, State::Next(StateName::ThematicBreakAtBreak), State::Next(StateName::ThematicBreakAtBreak), - ) + ); + State::Retry(space_or_tab(tokenizer)) } } } diff --git a/src/content/document.rs b/src/content/document.rs index f2890f3..04f9dc6 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -81,10 +81,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ))); tokenizer.attempt( - StateName::BomStart, State::Next(StateName::DocumentContainerExistingBefore), State::Next(StateName::DocumentContainerExistingBefore), - ) + ); + + State::Retry(StateName::BomStart) } /// Before existing containers. @@ -102,14 +103,17 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { let container = &tokenizer.tokenize_state.document_container_stack [tokenizer.tokenize_state.document_continued]; + let name = match container.kind { + Container::BlockQuote => StateName::BlockQuoteContStart, + Container::ListItem => StateName::ListContStart, + }; + tokenizer.attempt( - match container.kind { - Container::BlockQuote => StateName::BlockQuoteContStart, - Container::ListItem => StateName::ListContStart, - }, State::Next(StateName::DocumentContainerExistingAfter), State::Next(StateName::DocumentContainerNewBefore), - ) + ); + + State::Retry(name) } // Otherwise, check new containers. else { @@ -173,10 +177,10 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { .swap(tokenizer.tokenize_state.document_continued, tail); tokenizer.attempt( - StateName::BlockQuoteStart, State::Next(StateName::DocumentContainerNewAfter), State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote), - ) + ); + State::Retry(StateName::BlockQuoteStart) } /// Maybe before a new container, but not a block quote. @@ -196,10 +200,10 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State }; tokenizer.attempt( - StateName::ListStart, State::Next(StateName::DocumentContainerNewAfter), State::Next(StateName::DocumentContainerNewBeforeNotList), - ) + ); + State::Retry(StateName::ListStart) } /// Maybe before a new container, but not a list. diff --git a/src/content/flow.rs b/src/content/flow.rs index 7eb7b64..c6bd398 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -35,54 +35,65 @@ use crate::tokenizer::Tokenizer; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'`' | b'~') => tokenizer.attempt( - StateName::CodeFencedStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), - Some(b'<') => tokenizer.attempt( - StateName::HtmlFlowStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), - Some(b'#') => tokenizer.attempt( - StateName::HeadingAtxStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), + Some(b'`' | b'~') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::CodeFencedStart) + } + Some(b'<') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::HtmlFlowStart) + } + Some(b'#') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::HeadingAtxStart) + } // Note: `-` is also used in thematic breaks, so it’s not included here. - Some(b'=') => tokenizer.attempt( - StateName::HeadingSetextStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), - Some(b'*' | b'_') => tokenizer.attempt( - StateName::ThematicBreakStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), - Some(b'[') => tokenizer.attempt( - StateName::DefinitionStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), + Some(b'=') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::HeadingSetextStart) + } + Some(b'*' | b'_') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::ThematicBreakStart) + } + Some(b'[') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::DefinitionStart) + } // Actual parsing: blank line? Indented code? Indented anything? // Also includes `-` which can be a setext heading underline or a thematic break. None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), - Some(_) => tokenizer.attempt( - StateName::ParagraphStart, - State::Next(StateName::FlowAfter), - State::Nok, - ), + Some(_) => { + tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); + State::Retry(StateName::ParagraphStart) + } } } pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::BlankLineStart, State::Next(StateName::FlowBlankLineAfter), State::Next(StateName::FlowBeforeCodeIndented), - ) + ); + State::Retry(StateName::BlankLineStart) } /// Before flow (initial). @@ -99,58 +110,58 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::CodeIndentedStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeCodeFenced), - ) + ); + State::Retry(StateName::CodeIndentedStart) } pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::CodeFencedStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeHtml), - ) + ); + State::Retry(StateName::CodeFencedStart) } pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HtmlFlowStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeHeadingAtx), - ) + ); + State::Retry(StateName::HtmlFlowStart) } pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HeadingAtxStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeHeadingSetext), - ) + ); + State::Retry(StateName::HeadingAtxStart) } pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HeadingSetextStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeThematicBreak), - ) + ); + State::Retry(StateName::HeadingSetextStart) } pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::ThematicBreakStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeDefinition), - ) + ); + State::Retry(StateName::ThematicBreakStart) } pub fn before_definition(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::DefinitionStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeParagraph), - ) + ); + State::Retry(StateName::DefinitionStart) } /// After a blank line. @@ -203,9 +214,6 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// |asd /// ``` pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - StateName::ParagraphStart, - State::Next(StateName::FlowAfter), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); + State::Retry(StateName::ParagraphStart) } diff --git a/src/content/string.rs b/src/content/string.rs index ce850e7..1eefd30 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -30,27 +30,28 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, - Some(b'&') => tokenizer.attempt( - StateName::CharacterReferenceStart, - State::Next(StateName::StringBefore), - State::Next(StateName::StringBeforeData), - ), - Some(b'\\') => tokenizer.attempt( - StateName::CharacterEscapeStart, - State::Next(StateName::StringBefore), - State::Next(StateName::StringBeforeData), - ), + Some(b'&') => { + tokenizer.attempt( + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), + ); + State::Retry(StateName::CharacterReferenceStart) + } + Some(b'\\') => { + tokenizer.attempt( + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), + ); + State::Retry(StateName::CharacterEscapeStart) + } _ => State::Retry(StateName::StringBeforeData), } } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - StateName::DataStart, - State::Next(StateName::StringBefore), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok); + State::Retry(StateName::DataStart) } /// Resolve whitespace. diff --git a/src/content/text.rs b/src/content/text.rs index 570759d..6509d30 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -48,47 +48,63 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, - Some(b'!') => tokenizer.attempt( - StateName::LabelStartImageStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), - Some(b'&') => tokenizer.attempt( - StateName::CharacterReferenceStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), - Some(b'*' | b'_') => tokenizer.attempt( - StateName::AttentionStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), + Some(b'!') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::LabelStartImageStart) + } + Some(b'&') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::CharacterReferenceStart) + } + Some(b'*' | b'_') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::AttentionStart) + } // `autolink`, `html_text` (order does not matter) - Some(b'<') => tokenizer.attempt( - StateName::AutolinkStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeHtml), - ), - Some(b'[') => tokenizer.attempt( - StateName::LabelStartLinkStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), - Some(b'\\') => tokenizer.attempt( - StateName::CharacterEscapeStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeHardBreakEscape), - ), - Some(b']') => tokenizer.attempt( - StateName::LabelEndStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), - Some(b'`') => tokenizer.attempt( - StateName::CodeTextStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), + Some(b'<') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeHtml), + ); + State::Retry(StateName::AutolinkStart) + } + Some(b'[') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::LabelStartLinkStart) + } + Some(b'\\') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeHardBreakEscape), + ); + State::Retry(StateName::CharacterEscapeStart) + } + Some(b']') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::LabelEndStart) + } + Some(b'`') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::CodeTextStart) + } _ => State::Retry(StateName::TextBeforeData), } } @@ -96,28 +112,25 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { /// At `<`, which wasn’t an autolink: before HTML? pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HtmlTextStart, State::Next(StateName::TextBefore), State::Next(StateName::TextBeforeData), - ) + ); + State::Retry(StateName::HtmlTextStart) } /// At `\`, which wasn’t a character escape: before a hard break? pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HardBreakEscapeStart, State::Next(StateName::TextBefore), State::Next(StateName::TextBeforeData), - ) + ); + State::Retry(StateName::HardBreakEscapeStart) } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - StateName::DataStart, - State::Next(StateName::TextBefore), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok); + State::Retry(StateName::DataStart) } /// Resolve whitespace. diff --git a/src/tokenizer.rs b/src/tokenizer.rs index fdca6c5..0b51c48 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -15,7 +15,7 @@ use crate::constant::TAB_SIZE; use crate::event::{Content, Event, Kind, Link, Name, Point, VOID_EVENTS}; use crate::parser::ParseState; use crate::resolve::{call as call_resolve, Name as ResolveName}; -use crate::state::{call, Name as StateName, State}; +use crate::state::{call, State}; use crate::util::edit_map::EditMap; /// Info used to tokenize the current container. @@ -525,9 +525,9 @@ impl<'a> Tokenizer<'a> { self.stack.truncate(previous.stack_len); } - /// Parse with `name` and its future states, to see if that results in - /// [`State::Ok`][] or [`State::Nok`][], then revert in both cases. - pub fn check(&mut self, name: StateName, ok: State, nok: State) -> State { + /// Stack an attempt, moving to `ok` on [`State::Ok`][] and `nok` on + /// [`State::Nok`][], reverting in both cases. + pub fn check(&mut self, ok: State, nok: State) { // Always capture (and restore) when checking. // No need to capture (and restore) when `nok` is `State::Nok`, because the // parent attempt will do it. @@ -539,14 +539,11 @@ impl<'a> Tokenizer<'a> { ok, nok, }); - - call(self, name) } - /// Parse with `name` and its future states, to see if that results in - /// [`State::Ok`][] or [`State::Nok`][], revert in the case of - /// `State::Nok`. - pub fn attempt(&mut self, name: StateName, ok: State, nok: State) -> State { + /// Stack an attempt, moving to `ok` on [`State::Ok`][] and `nok` on + /// [`State::Nok`][], reverting in the latter case. + pub fn attempt(&mut self, ok: State, nok: State) { // Always capture (and restore) when checking. // No need to capture (and restore) when `nok` is `State::Nok`, because the // parent attempt will do it. @@ -562,8 +559,6 @@ impl<'a> Tokenizer<'a> { ok, nok, }); - - call(self, name) } /// Tokenize. |