diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-12 19:04:31 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-12 19:04:31 +0200 |
commit | 395b13daf6dd6da0204302d344caa710ea891d62 (patch) | |
tree | 4a7c688af7a70c7e3b694d87ba66e01dd0670cf6 | |
parent | 6dc2011d69c85820feddf6799142d304cc2eeb29 (diff) | |
download | markdown-rs-395b13daf6dd6da0204302d344caa710ea891d62.tar.gz markdown-rs-395b13daf6dd6da0204302d344caa710ea891d62.tar.bz2 markdown-rs-395b13daf6dd6da0204302d344caa710ea891d62.zip |
Refactor to attempt less if never needed
Diffstat (limited to '')
-rw-r--r-- | src/construct/blank_line.rs | 12 | ||||
-rw-r--r-- | src/construct/block_quote.rs | 24 | ||||
-rw-r--r-- | src/construct/code_fenced.rs | 131 | ||||
-rw-r--r-- | src/construct/code_indented.rs | 16 | ||||
-rw-r--r-- | src/construct/definition.rs | 92 | ||||
-rw-r--r-- | src/construct/heading_atx.rs | 24 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 36 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 33 | ||||
-rw-r--r-- | src/construct/html_text.rs | 14 | ||||
-rw-r--r-- | src/construct/label_end.rs | 44 | ||||
-rw-r--r-- | src/construct/list.rs | 37 | ||||
-rw-r--r-- | src/construct/partial_space_or_tab_eol.rs | 61 | ||||
-rw-r--r-- | src/construct/partial_title.rs | 84 | ||||
-rw-r--r-- | src/construct/thematic_break.rs | 52 |
14 files changed, 367 insertions, 293 deletions
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index 81b58fc..7f1d023 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -47,12 +47,12 @@ use crate::tokenizer::Tokenizer; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::BlankLineAfter), - State::Next(StateName::BlankLineAfter), - ); - - State::Retry(space_or_tab(tokenizer)) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::BlankLineAfter), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Retry(StateName::BlankLineAfter) + } } /// At eof/eol, after optional whitespace. diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 6e660cb..4f0870f 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -64,16 +64,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn cont_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(State::Next(StateName::BlockQuoteContBefore), State::Nok); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - )) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::BlockQuoteContBefore), State::Nok); + State::Retry(space_or_tab_min_max( + tokenizer, + 1, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::BlockQuoteContBefore) + } } /// At `>`, after optional whitespace. diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index be0542a..74d6fe1 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -118,25 +118,32 @@ use crate::util::slice::{Position, Slice}; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.code_fenced { - tokenizer.enter(Name::CodeFenced); - tokenizer.enter(Name::CodeFencedFence); + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.enter(Name::CodeFenced); + tokenizer.enter(Name::CodeFencedFence); + tokenizer.attempt( + State::Next(StateName::CodeFencedBeforeSequenceOpen), + State::Nok, + ); + return State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )); + } - tokenizer.attempt( - State::Next(StateName::CodeFencedBeforeSequenceOpen), - State::Nok, - ); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - )) - } else { - State::Nok + if matches!(tokenizer.current, Some(b'`' | b'~')) { + tokenizer.enter(Name::CodeFenced); + tokenizer.enter(Name::CodeFencedFence); + return State::Retry(StateName::CodeFencedBeforeSequenceOpen); + } } + + State::Nok } /// In opening fence, after prefix, at sequence. @@ -184,20 +191,18 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size += 1; tokenizer.consume(); State::Next(StateName::CodeFencedSequenceOpen) - } else if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN { - tokenizer.exit(Name::CodeFencedFenceSequence); - - tokenizer.attempt( - State::Next(StateName::CodeFencedInfoBefore), - State::Next(StateName::CodeFencedInfoBefore), - ); - - State::Retry(space_or_tab(tokenizer)) - } else { + } else if tokenizer.tokenize_state.size < CODE_FENCED_SEQUENCE_SIZE_MIN { tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; State::Nok + } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.exit(Name::CodeFencedFenceSequence); + tokenizer.attempt(State::Next(StateName::CodeFencedInfoBefore), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + tokenizer.exit(Name::CodeFencedFenceSequence); + State::Retry(StateName::CodeFencedInfoBefore) } } @@ -254,10 +259,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { Some(b'\t' | b' ') => { tokenizer.exit(Name::Data); tokenizer.exit(Name::CodeFencedFenceInfo); - tokenizer.attempt( - State::Next(StateName::CodeFencedMetaBefore), - State::Next(StateName::CodeFencedMetaBefore), - ); + tokenizer.attempt(State::Next(StateName::CodeFencedMetaBefore), State::Nok); State::Retry(space_or_tab(tokenizer)) } Some(byte) => { @@ -362,20 +364,24 @@ pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { pub fn close_start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Name::CodeFencedFence); - tokenizer.attempt( - State::Next(StateName::CodeFencedBeforeSequenceClose), - State::Nok, - ); + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt( + State::Next(StateName::CodeFencedBeforeSequenceClose), + State::Nok, + ); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - )) + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::CodeFencedBeforeSequenceClose) + } } /// In closing fence, after optional whitespace, at sequence. @@ -413,11 +419,16 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { { tokenizer.tokenize_state.size_b = 0; tokenizer.exit(Name::CodeFencedFenceSequence); - tokenizer.attempt( - State::Next(StateName::CodeFencedAfterSequenceClose), - State::Next(StateName::CodeFencedAfterSequenceClose), - ); - State::Retry(space_or_tab(tokenizer)) + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt( + State::Next(StateName::CodeFencedAfterSequenceClose), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Retry(StateName::CodeFencedAfterSequenceClose) + } } else { tokenizer.tokenize_state.size_b = 0; State::Nok @@ -466,15 +477,19 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { /// | ~~~ /// ``` pub fn content_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::CodeFencedBeforeContentChunk), - State::Nok, - ); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - tokenizer.tokenize_state.size_c, - )) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt( + State::Next(StateName::CodeFencedBeforeContentChunk), + State::Nok, + ); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + tokenizer.tokenize_state.size_c, + )) + } else { + State::Retry(StateName::CodeFencedBeforeContentChunk) + } } /// Before code content, after optional prefix. diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 5805346..cf111f4 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -63,7 +63,10 @@ use crate::tokenizer::Tokenizer; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { // Do not interrupt paragraphs. - if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented { + if !tokenizer.interrupt + && tokenizer.parse_state.constructs.code_indented + && matches!(tokenizer.current, Some(b'\t' | b' ')) + { tokenizer.enter(Name::CodeIndented); tokenizer.attempt(State::Next(StateName::CodeIndentedAtBreak), State::Nok); State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE)) @@ -158,11 +161,12 @@ pub fn further_start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn further_begin(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::CodeIndentedFurtherAfter), - State::Next(StateName::CodeIndentedFurtherAfter), - ); - State::Retry(space_or_tab(tokenizer)) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::CodeIndentedFurtherAfter), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Nok + } } /// After whitespace, not indented enough. diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 2378c48..e242e23 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -112,24 +112,26 @@ use crate::util::{ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { // Do not interrupt paragraphs (but do follow definitions). - let possible = !tokenizer.interrupt - || (!tokenizer.events.is_empty() - && tokenizer.events[skip::opt_back( - &tokenizer.events, - tokenizer.events.len() - 1, - &[Name::LineEnding, Name::SpaceOrTab], - )] - .name - == Name::Definition); - - if possible && tokenizer.parse_state.constructs.definition { + if tokenizer.parse_state.constructs.definition + && (!tokenizer.interrupt + || (!tokenizer.events.is_empty() + && tokenizer.events[skip::opt_back( + &tokenizer.events, + tokenizer.events.len() - 1, + &[Name::LineEnding, Name::SpaceOrTab], + )] + .name + == Name::Definition)) + { tokenizer.enter(Name::Definition); - tokenizer.attempt( - State::Next(StateName::DefinitionBefore), - State::Next(StateName::DefinitionBefore), - ); - // Note: arbitrary whitespace allowed even if code (indented) is on. - State::Retry(space_or_tab(tokenizer)) + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + // Note: arbitrary whitespace allowed even if code (indented) is on. + tokenizer.attempt(State::Next(StateName::DefinitionBefore), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Retry(StateName::DefinitionBefore) + } } else { State::Nok } @@ -189,11 +191,15 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn marker_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::DefinitionDestinationBefore), - State::Next(StateName::DefinitionDestinationBefore), - ); - State::Retry(space_or_tab_eol(tokenizer)) + if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) { + tokenizer.attempt( + State::Next(StateName::DefinitionDestinationBefore), + State::Next(StateName::DefinitionDestinationBefore), + ); + State::Retry(space_or_tab_eol(tokenizer)) + } else { + State::Retry(StateName::DefinitionDestinationBefore) + } } /// Before destination. @@ -257,11 +263,15 @@ pub fn destination_missing(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::DefinitionAfterWhitespace), - State::Next(StateName::DefinitionAfterWhitespace), - ); - State::Retry(space_or_tab(tokenizer)) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt( + State::Next(StateName::DefinitionAfterWhitespace), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Retry(StateName::DefinitionAfterWhitespace) + } } /// After definition, after optional whitespace. @@ -313,11 +323,15 @@ pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn title_before(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::DefinitionTitleBeforeMarker), - State::Nok, - ); - State::Retry(space_or_tab_eol(tokenizer)) + if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) { + tokenizer.attempt( + State::Next(StateName::DefinitionTitleBeforeMarker), + State::Nok, + ); + State::Retry(space_or_tab_eol(tokenizer)) + } else { + State::Nok + } } /// At title. @@ -345,11 +359,15 @@ pub fn title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; tokenizer.tokenize_state.token_3 = Name::Data; - tokenizer.attempt( - State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), - State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), - ); - State::Retry(space_or_tab(tokenizer)) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt( + State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Retry(StateName::DefinitionTitleAfterOptionalWhitespace) + } } /// After title, after optional whitespace. diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 22b93db..30c22f3 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -70,16 +70,20 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.heading_atx { tokenizer.enter(Name::HeadingAtx); - tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - )) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::HeadingAtxBefore) + } } else { State::Nok } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 4e6345a..1f6270a 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -85,16 +85,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { .name == Name::Paragraph) { - tokenizer.attempt(State::Next(StateName::HeadingSetextBefore), State::Nok); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - )) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::HeadingSetextBefore), State::Nok); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::HeadingSetextBefore) + } } else { State::Nok } @@ -132,11 +136,13 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } else { tokenizer.tokenize_state.marker = 0; tokenizer.exit(Name::HeadingSetextUnderline); - tokenizer.attempt( - State::Next(StateName::HeadingSetextAfter), - State::Next(StateName::HeadingSetextAfter), - ); - State::Retry(space_or_tab(tokenizer)) + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::HeadingSetextAfter), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Retry(StateName::HeadingSetextAfter) + } } } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 123e1a3..e90abc4 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -133,21 +133,26 @@ const COMPLETE: u8 = 7; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.html_flow { tokenizer.enter(Name::HtmlFlow); - tokenizer.attempt(State::Next(StateName::HtmlFlowBefore), State::Nok); - State::Retry(space_or_tab_with_options( - tokenizer, - SpaceOrTabOptions { - kind: Name::HtmlFlowData, - min: 0, - max: if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::HtmlFlowBefore), State::Nok); + State::Retry(space_or_tab_with_options( + tokenizer, + SpaceOrTabOptions { + kind: Name::HtmlFlowData, + min: 0, + max: if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + connect: false, + content: None, }, - connect: false, - content: None, - }, - )) + )) + } else { + State::Retry(StateName::HtmlFlowBefore) + } } else { State::Nok } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index ffbc768..c3b0a65 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -666,11 +666,15 @@ pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::HtmlTextLineEndingAfterPrefix), - State::Next(StateName::HtmlTextLineEndingAfterPrefix), - ); - State::Retry(space_or_tab(tokenizer)) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt( + State::Next(StateName::HtmlTextLineEndingAfterPrefix), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Retry(StateName::HtmlTextLineEndingAfterPrefix) + } } /// After eol, after optional whitespace. diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 7f80415..8921fcc 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -362,11 +362,15 @@ pub fn resource_start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn resource_before(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::LabelEndResourceOpen), - State::Next(StateName::LabelEndResourceOpen), - ); - State::Retry(space_or_tab_eol(tokenizer)) + if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) { + tokenizer.attempt( + State::Next(StateName::LabelEndResourceOpen), + State::Next(StateName::LabelEndResourceOpen), + ); + State::Retry(space_or_tab_eol(tokenizer)) + } else { + State::Retry(StateName::LabelEndResourceOpen) + } } /// In resource, after optional whitespace, at `)` or a destination. @@ -407,11 +411,16 @@ pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_4 = Name::Data; tokenizer.tokenize_state.token_5 = Name::Data; tokenizer.tokenize_state.size_b = 0; - tokenizer.attempt( - State::Next(StateName::LabelEndResourceBetween), - State::Next(StateName::LabelEndResourceEnd), - ); - State::Retry(space_or_tab_eol(tokenizer)) + + if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) { + tokenizer.attempt( + State::Next(StateName::LabelEndResourceBetween), + State::Next(StateName::LabelEndResourceEnd), + ); + State::Retry(space_or_tab_eol(tokenizer)) + } else { + State::Retry(StateName::LabelEndResourceEnd) + } } /// At invalid destination. @@ -462,11 +471,16 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; tokenizer.tokenize_state.token_3 = Name::Data; - tokenizer.attempt( - State::Next(StateName::LabelEndResourceEnd), - State::Next(StateName::LabelEndResourceEnd), - ); - State::Retry(space_or_tab_eol(tokenizer)) + + if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) { + tokenizer.attempt( + State::Next(StateName::LabelEndResourceBetween), + State::Next(StateName::LabelEndResourceEnd), + ); + State::Retry(space_or_tab_eol(tokenizer)) + } else { + State::Retry(StateName::LabelEndResourceEnd) + } } /// In resource, at `)`. diff --git a/src/construct/list.rs b/src/construct/list.rs index 076ff58..596330c 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -64,16 +64,21 @@ use crate::util::{ pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.list { tokenizer.enter(Name::ListItem); - tokenizer.attempt(State::Next(StateName::ListBefore), State::Nok); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - )) + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::ListBefore), State::Nok); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::ListBefore) + } } else { State::Nok } @@ -319,9 +324,11 @@ pub fn cont_blank(tokenizer: &mut Tokenizer) -> State { if container.blank_initial { State::Nok - } else { + } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { // Consume, optionally, at most `size`. State::Retry(space_or_tab_min_max(tokenizer, 0, size)) + } else { + State::Ok } } @@ -339,8 +346,12 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State { container.blank_initial = false; - // Consume exactly `size`. - State::Retry(space_or_tab_min_max(tokenizer, size, size)) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + // Consume exactly `size`. + State::Retry(space_or_tab_min_max(tokenizer, size, size)) + } else { + State::Nok + } } /// Find adjacent list items with the same marker. diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs index b38bc64..427cb11 100644 --- a/src/construct/partial_space_or_tab_eol.rs +++ b/src/construct/partial_space_or_tab_eol.rs @@ -54,21 +54,25 @@ pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options /// | ␠␠b /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::SpaceOrTabEolAfterFirst), - State::Next(StateName::SpaceOrTabEolAtEol), - ); + if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) { + tokenizer.attempt( + State::Next(StateName::SpaceOrTabEolAfterFirst), + State::Next(StateName::SpaceOrTabEolAtEol), + ); - State::Retry(space_or_tab_with_options( - tokenizer, - SpaceOrTabOptions { - kind: Name::SpaceOrTab, - min: 1, - max: usize::MAX, - content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(), - connect: tokenizer.tokenize_state.space_or_tab_eol_connect, - }, - )) + State::Retry(space_or_tab_with_options( + tokenizer, + SpaceOrTabOptions { + kind: Name::SpaceOrTab, + min: 1, + max: usize::MAX, + content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(), + connect: tokenizer.tokenize_state.space_or_tab_eol_connect, + }, + )) + } else { + State::Nok + } } /// After initial whitespace, at optional eol. @@ -151,20 +155,21 @@ pub fn at_eol(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn after_eol(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - State::Next(StateName::SpaceOrTabEolAfterMore), - State::Next(StateName::SpaceOrTabEolAfterMore), - ); - State::Retry(space_or_tab_with_options( - tokenizer, - SpaceOrTabOptions { - kind: Name::SpaceOrTab, - min: 1, - max: usize::MAX, - content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(), - connect: tokenizer.tokenize_state.space_or_tab_eol_connect, - }, - )) + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::SpaceOrTabEolAfterMore), State::Nok); + State::Retry(space_or_tab_with_options( + tokenizer, + SpaceOrTabOptions { + kind: Name::SpaceOrTab, + min: 1, + max: usize::MAX, + content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(), + connect: tokenizer.tokenize_state.space_or_tab_eol_connect, + }, + )) + } else { + State::Retry(StateName::SpaceOrTabEolAfterMore) + } } /// After optional final whitespace. diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 93dbd28..f0c4931 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -66,22 +66,17 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn begin(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'"' | b'\'' | b')') - if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => - { - tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); - tokenizer.consume(); - tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); - tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); - tokenizer.tokenize_state.marker = 0; - tokenizer.tokenize_state.connect = false; - State::Ok - } - _ => { - tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); - State::Retry(StateName::TitleAtBreak) - } + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); + tokenizer.consume(); + tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); + tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); + tokenizer.tokenize_state.marker = 0; + tokenizer.tokenize_state.connect = false; + State::Ok + } else { + tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); + State::Retry(StateName::TitleAtBreak) } } @@ -92,13 +87,11 @@ pub fn begin(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn at_break(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None => { - tokenizer.tokenize_state.marker = 0; - tokenizer.tokenize_state.connect = false; - State::Nok - } - Some(b'\n') => { + if let Some(byte) = tokenizer.current { + if byte == tokenizer.tokenize_state.marker { + tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); + State::Retry(StateName::TitleBegin) + } else if byte == b'\n' { tokenizer.attempt( State::Next(StateName::TitleAfterEol), State::Next(StateName::TitleAtBlankLine), @@ -110,14 +103,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { connect: tokenizer.tokenize_state.connect, }, )) - } - Some(b'"' | b'\'' | b')') - if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => - { - tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); - State::Retry(StateName::TitleBegin) - } - Some(_) => { + } else { tokenizer.enter_link( Name::Data, Link { @@ -136,6 +122,10 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::TitleInside) } + } else { + tokenizer.tokenize_state.marker = 0; + tokenizer.tokenize_state.connect = false; + State::Nok } } @@ -172,25 +162,19 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn inside(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None | Some(b'\n') => { - tokenizer.exit(Name::Data); - State::Retry(StateName::TitleAtBreak) - } - Some(b'"' | b'\'' | b')') - if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => - { - tokenizer.exit(Name::Data); - State::Retry(StateName::TitleAtBreak) - } - Some(byte) => { - tokenizer.consume(); - State::Next(if matches!(byte, b'\\') { - StateName::TitleEscape - } else { - StateName::TitleInside - }) - } + if tokenizer.current == Some(tokenizer.tokenize_state.marker) + || matches!(tokenizer.current, None | Some(b'\n')) + { + tokenizer.exit(Name::Data); + State::Retry(StateName::TitleAtBreak) + } else { + let name = if tokenizer.current == Some(b'\\') { + StateName::TitleEscape + } else { + StateName::TitleInside + }; + tokenizer.consume(); + State::Next(name) } } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index af8206e..f493b96 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -63,16 +63,21 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.thematic_break { tokenizer.enter(Name::ThematicBreak); - tokenizer.attempt(State::Next(StateName::ThematicBreakBefore), State::Nok); - State::Retry(space_or_tab_min_max( - tokenizer, - 0, - if tokenizer.parse_state.constructs.code_indented { - TAB_SIZE - 1 - } else { - usize::MAX - }, - )) + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::ThematicBreakBefore), State::Nok); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::ThematicBreakBefore) + } } else { State::Nok } @@ -127,21 +132,16 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn sequence(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'*' | b'-' | b'_') - if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => - { - tokenizer.consume(); - tokenizer.tokenize_state.size += 1; - State::Next(StateName::ThematicBreakSequence) - } - _ => { - tokenizer.exit(Name::ThematicBreakSequence); - tokenizer.attempt( - State::Next(StateName::ThematicBreakAtBreak), - State::Next(StateName::ThematicBreakAtBreak), - ); - State::Retry(space_or_tab(tokenizer)) - } + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.consume(); + tokenizer.tokenize_state.size += 1; + State::Next(StateName::ThematicBreakSequence) + } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.exit(Name::ThematicBreakSequence); + tokenizer.attempt(State::Next(StateName::ThematicBreakAtBreak), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + tokenizer.exit(Name::ThematicBreakSequence); + State::Retry(StateName::ThematicBreakAtBreak) } } |