diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-10 10:54:43 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-10 10:54:43 +0200 |
commit | 3a90a49518bbc53876d3f46d8763b2fe0f03d789 (patch) | |
tree | eecaaf9586cf8632b3b6fe22794dae1f492849f5 | |
parent | 8162222295d71ea7fd9270c7b3b9497b91db3f1f (diff) | |
download | markdown-rs-3a90a49518bbc53876d3f46d8763b2fe0f03d789.tar.gz markdown-rs-3a90a49518bbc53876d3f46d8763b2fe0f03d789.tar.bz2 markdown-rs-3a90a49518bbc53876d3f46d8763b2fe0f03d789.zip |
Add `State::Retry`
26 files changed, 205 insertions, 213 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index ff33f97..d61813d 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -120,7 +120,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { Some(b'*' | b'_') if tokenizer.parse_state.constructs.attention => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.enter(Token::AttentionSequence); - inside(tokenizer) + State::Retry(StateName::AttentionInside) } _ => State::Nok, } diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index f1b92d9..eef3840 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -142,7 +142,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::AutolinkSchemeOrEmailAtext) } - _ => email_atext(tokenizer), + _ => State::Retry(StateName::AutolinkEmailAtext), } } @@ -160,9 +160,9 @@ pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { // Count the previous alphabetical from `open` too. tokenizer.tokenize_state.size = 1; - scheme_inside_or_email_atext(tokenizer) + State::Retry(StateName::AutolinkSchemeInsideOrEmailAtext) } - _ => email_atext(tokenizer), + _ => State::Retry(StateName::AutolinkEmailAtext), } } @@ -191,7 +191,7 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.tokenize_state.size = 0; - email_atext(tokenizer) + State::Retry(StateName::AutolinkEmailAtext) } } } @@ -206,7 +206,11 @@ pub fn url_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.exit(Token::AutolinkProtocol); - end(tokenizer) + tokenizer.enter(Token::AutolinkMarker); + tokenizer.consume(); + tokenizer.exit(Token::AutolinkMarker); + tokenizer.exit(Token::Autolink); + State::Ok } // ASCII control, space, or `<`. None | Some(b'\0'..=0x1F | b' ' | b'<' | 0x7F) => State::Nok, @@ -265,7 +269,9 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State { pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphanumeric. - Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => email_value(tokenizer), + Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { + State::Retry(StateName::AutolinkEmailValue) + } _ => State::Nok, } } @@ -290,9 +296,13 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State { // Change the token type. tokenizer.events[index - 1].token_type = Token::AutolinkEmail; tokenizer.events[index].token_type = Token::AutolinkEmail; - end(tokenizer) + tokenizer.enter(Token::AutolinkMarker); + tokenizer.consume(); + tokenizer.exit(Token::AutolinkMarker); + tokenizer.exit(Token::Autolink); + State::Ok } - _ => email_value(tokenizer), + _ => State::Retry(StateName::AutolinkEmailValue), } } @@ -325,24 +335,3 @@ pub fn email_value(tokenizer: &mut Tokenizer) -> State { } } } - -/// At the `>`. -/// -/// ```markdown -/// > | a<https://example.com>b -/// ^ -/// > | a<user@example.com>b -/// ^ -/// ``` -pub fn end(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'>') => { - tokenizer.enter(Token::AutolinkMarker); - tokenizer.consume(); - tokenizer.exit(Token::AutolinkMarker); - tokenizer.exit(Token::Autolink); - State::Ok - } - _ => unreachable!("expected `>`"), - } -} diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 0e02be9..bbfad5b 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -71,9 +71,9 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.enter(Token::BlockQuote); - cont_before(tokenizer) + State::Retry(StateName::BlockQuoteContBefore) } - _ => cont_before(tokenizer), + _ => State::Retry(StateName::BlockQuoteContBefore), } } diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 5a0e15c..e1c7e79 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -113,7 +113,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { } else { tokenizer.tokenize_state.marker = b'&'; tokenizer.enter(Token::CharacterReferenceValue); - value(tokenizer) + State::Retry(StateName::CharacterReferenceValue) } } @@ -138,7 +138,7 @@ pub fn numeric(tokenizer: &mut Tokenizer) -> State { } else { tokenizer.enter(Token::CharacterReferenceValue); tokenizer.tokenize_state.marker = b'#'; - value(tokenizer) + State::Retry(StateName::CharacterReferenceValue) } } diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 17c7566..0d4345a 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -164,7 +164,7 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.tokenize_state.prefix = prefix; tokenizer.enter(Token::CodeFencedFenceSequence); - sequence_open(tokenizer) + State::Retry(StateName::CodeFencedSequenceOpen) } else { State::Nok } @@ -217,12 +217,16 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::CodeFencedFence); // Do not form containers. tokenizer.concrete = true; - at_break(tokenizer) + tokenizer.check( + StateName::NonLazyContinuationStart, + State::Next(StateName::CodeFencedAtNonLazyBreak), + State::Next(StateName::CodeFencedAfter), + ) } _ => { tokenizer.enter(Token::CodeFencedFenceInfo); tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - info(tokenizer) + State::Retry(StateName::CodeFencedInfo) } } } @@ -240,10 +244,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') => { tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceInfo); - tokenizer.exit(Token::CodeFencedFence); - // Do not form containers. - tokenizer.concrete = true; - at_break(tokenizer) + State::Retry(StateName::CodeFencedInfoBefore) } Some(b'\t' | b' ') => { tokenizer.exit(Token::Data); @@ -279,16 +280,11 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn meta_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => { - tokenizer.exit(Token::CodeFencedFence); - // Do not form containers. - tokenizer.concrete = true; - at_break(tokenizer) - } + None | Some(b'\n') => State::Retry(StateName::CodeFencedInfoBefore), _ => { tokenizer.enter(Token::CodeFencedFenceMeta); tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - meta(tokenizer) + State::Retry(StateName::CodeFencedMeta) } } } @@ -306,10 +302,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') => { tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceMeta); - tokenizer.exit(Token::CodeFencedFence); - // Do not form containers. - tokenizer.concrete = true; - at_break(tokenizer) + State::Retry(StateName::CodeFencedInfoBefore) } Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { tokenizer.concrete = false; @@ -325,23 +318,6 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { } } -/// At an eol/eof in code, before a closing fence or before content. -/// -/// ```markdown -/// > | ~~~js -/// ^ -/// > | console.log(1) -/// ^ -/// | ~~~ -/// ``` -pub fn at_break(tokenizer: &mut Tokenizer) -> State { - tokenizer.check( - StateName::NonLazyContinuationStart, - State::Next(StateName::CodeFencedAtNonLazyBreak), - State::Next(StateName::CodeFencedAfter), - ) -} - /// At an eol/eof in code, before a non-lazy closing fence or content. /// /// ```markdown @@ -417,7 +393,7 @@ pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.enter(Token::CodeFencedFenceSequence); - sequence_close(tokenizer) + State::Retry(StateName::CodeFencedSequenceClose) } _ => State::Nok, } @@ -516,10 +492,14 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => at_break(tokenizer), + None | Some(b'\n') => tokenizer.check( + StateName::NonLazyContinuationStart, + State::Next(StateName::CodeFencedAtNonLazyBreak), + State::Next(StateName::CodeFencedAfter), + ), _ => { tokenizer.enter(Token::CodeFlowChunk); - content_chunk(tokenizer) + State::Retry(StateName::CodeFencedContentChunk) } } } @@ -536,7 +516,7 @@ pub fn content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::CodeFlowChunk); - at_break(tokenizer) + State::Retry(StateName::CodeFencedBeforeContentChunk) } _ => { tokenizer.consume(); diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index de7683d..36ae4c6 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -83,7 +83,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None => after(tokenizer), + None => State::Retry(StateName::CodeIndentedAfter), Some(b'\n') => tokenizer.attempt( StateName::CodeIndentedFurtherStart, State::Next(StateName::CodeIndentedAtBreak), @@ -91,7 +91,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { ), _ => { tokenizer.enter(Token::CodeFlowChunk); - inside(tokenizer) + State::Retry(StateName::CodeIndentedInside) } } } @@ -106,7 +106,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::CodeFlowChunk); - at_break(tokenizer) + State::Retry(StateName::CodeIndentedAtBreak) } _ => { tokenizer.consume(); @@ -191,7 +191,7 @@ pub fn further_begin(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn further_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\n') => further_start(tokenizer), + Some(b'\n') => State::Retry(StateName::CodeIndentedFurtherStart), _ => State::Nok, } } diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index 729abe5..2c8faf3 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -105,7 +105,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { { tokenizer.enter(Token::CodeText); tokenizer.enter(Token::CodeTextSequence); - sequence_open(tokenizer) + State::Retry(StateName::CodeTextSequenceOpen) } _ => State::Nok, } @@ -124,7 +124,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::CodeTextSequenceOpen) } else { tokenizer.exit(Token::CodeTextSequence); - between(tokenizer) + State::Retry(StateName::CodeTextBetween) } } @@ -148,11 +148,11 @@ pub fn between(tokenizer: &mut Tokenizer) -> State { } Some(b'`') => { tokenizer.enter(Token::CodeTextSequence); - sequence_close(tokenizer) + State::Retry(StateName::CodeTextSequenceClose) } _ => { tokenizer.enter(Token::CodeTextData); - data(tokenizer) + State::Retry(StateName::CodeTextData) } } } @@ -167,7 +167,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n' | b'`') => { tokenizer.exit(Token::CodeTextData); - between(tokenizer) + State::Retry(StateName::CodeTextBetween) } _ => { tokenizer.consume(); @@ -203,7 +203,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { tokenizer.events[index - 1].token_type = Token::CodeTextData; tokenizer.events[index].token_type = Token::CodeTextData; tokenizer.tokenize_state.size_other = 0; - between(tokenizer) + State::Retry(StateName::CodeTextBetween) } } } diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 41fad49..d70f7db 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -92,7 +92,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { if Some(b'#') == tokenizer.current { tokenizer.enter(Token::HeadingAtxSequence); - sequence_open(tokenizer) + State::Retry(StateName::HeadingAtxSequenceOpen) } else { State::Nok } @@ -109,7 +109,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => { tokenizer.tokenize_state.size = 0; tokenizer.exit(Token::HeadingAtxSequence); - at_break(tokenizer) + State::Retry(StateName::HeadingAtxAtBreak) } Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => { tokenizer.tokenize_state.size += 1; @@ -150,11 +150,11 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } Some(b'#') => { tokenizer.enter(Token::HeadingAtxSequence); - sequence_further(tokenizer) + State::Retry(StateName::HeadingAtxSequenceFurther) } Some(_) => { tokenizer.enter_with_content(Token::Data, Some(ContentType::Text)); - data(tokenizer) + State::Retry(StateName::HeadingAtxData) } } } @@ -173,7 +173,7 @@ pub fn sequence_further(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::HeadingAtxSequenceFurther) } else { tokenizer.exit(Token::HeadingAtxSequence); - at_break(tokenizer) + State::Retry(StateName::HeadingAtxAtBreak) } } @@ -188,7 +188,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State { // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text. None | Some(b'\t' | b'\n' | b' ') => { tokenizer.exit(Token::Data); - at_break(tokenizer) + State::Retry(StateName::HeadingAtxAtBreak) } _ => { tokenizer.consume(); diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 4541a87..f653d75 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -115,7 +115,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { Some(b'-' | b'=') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.enter(Token::HeadingSetextUnderline); - inside(tokenizer) + State::Retry(StateName::HeadingSetextInside) } _ => State::Nok, } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 298bcaf..b49b231 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -203,7 +203,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { // ASCII alphabetical. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.tokenize_state.start = tokenizer.point.index; - tag_name(tokenizer) + State::Retry(StateName::HtmlFlowTagName) } _ => State::Nok, } @@ -334,7 +334,7 @@ pub fn tag_name(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = RAW; // Do not form containers. tokenizer.concrete = true; - continuation(tokenizer) + State::Retry(StateName::HtmlFlowContinuation) } else if HTML_BLOCK_NAMES.contains(&name.as_str()) { tokenizer.tokenize_state.marker = BASIC; @@ -344,7 +344,7 @@ pub fn tag_name(tokenizer: &mut Tokenizer) -> State { } else { // Do not form containers. tokenizer.concrete = true; - continuation(tokenizer) + State::Retry(StateName::HtmlFlowContinuation) } } else { tokenizer.tokenize_state.marker = COMPLETE; @@ -354,9 +354,9 @@ pub fn tag_name(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = 0; State::Nok } else if closing_tag { - complete_closing_tag_after(tokenizer) + State::Retry(StateName::HtmlFlowCompleteClosingTagAfter) } else { - complete_attribute_name_before(tokenizer) + State::Retry(StateName::HtmlFlowCompleteAttributeNameBefore) } } } @@ -402,7 +402,7 @@ pub fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlFlowCompleteClosingTagAfter) } - _ => complete_end(tokenizer), + _ => State::Retry(StateName::HtmlFlowCompleteEnd), } } @@ -440,7 +440,7 @@ pub fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlFlowCompleteAttributeName) } - _ => complete_end(tokenizer), + _ => State::Retry(StateName::HtmlFlowCompleteEnd), } } @@ -461,7 +461,7 @@ pub fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlFlowCompleteAttributeName) } - _ => complete_attribute_name_after(tokenizer), + _ => State::Retry(StateName::HtmlFlowCompleteAttributeNameAfter), } } @@ -484,7 +484,7 @@ pub fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlFlowCompleteAttributeValueBefore) } - _ => complete_attribute_name_before(tokenizer), + _ => State::Retry(StateName::HtmlFlowCompleteAttributeNameBefore), } } @@ -512,7 +512,7 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlFlowCompleteAttributeValueQuoted) } - _ => complete_attribute_value_unquoted(tokenizer), + _ => State::Retry(StateName::HtmlFlowCompleteAttributeValueUnquoted), } } @@ -554,7 +554,7 @@ pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => { - complete_attribute_name_after(tokenizer) + State::Retry(StateName::HtmlFlowCompleteAttributeNameAfter) } Some(_) => { tokenizer.consume(); @@ -572,7 +572,7 @@ pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { if let Some(b'\t' | b' ' | b'/' | b'>') = tokenizer.current { - complete_attribute_name_before(tokenizer) + State::Retry(StateName::HtmlFlowCompleteAttributeNameBefore) } else { tokenizer.tokenize_state.marker = 0; State::Nok @@ -606,7 +606,7 @@ pub fn complete_after(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') => { // Do not form containers. tokenizer.concrete = true; - continuation(tokenizer) + State::Retry(StateName::HtmlFlowContinuation) } Some(b'\t' | b' ') => { tokenizer.consume(); @@ -641,7 +641,7 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { // Note: important that this is after the basic/complete case. None | Some(b'\n') => { tokenizer.exit(Token::HtmlFlowData); - continuation_start(tokenizer) + State::Retry(StateName::HtmlFlowContinuationStart) } Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => { tokenizer.consume(); @@ -713,10 +713,10 @@ pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn continuation_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => continuation_start(tokenizer), + None | Some(b'\n') => State::Retry(StateName::HtmlFlowContinuationStart), _ => { tokenizer.enter(Token::HtmlFlowData); - continuation(tokenizer) + State::Retry(StateName::HtmlFlowContinuation) } } } @@ -733,7 +733,7 @@ pub fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlFlowContinuationDeclarationInside) } - _ => continuation(tokenizer), + _ => State::Retry(StateName::HtmlFlowContinuation), } } @@ -750,7 +750,7 @@ pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.start = tokenizer.point.index; State::Next(StateName::HtmlFlowContinuationRawEndTag) } - _ => continuation(tokenizer), + _ => State::Retry(StateName::HtmlFlowContinuation), } } @@ -777,7 +777,7 @@ pub fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlFlowContinuationClose) } else { - continuation(tokenizer) + State::Retry(StateName::HtmlFlowContinuation) } } Some(b'A'..=b'Z' | b'a'..=b'z') @@ -788,7 +788,7 @@ pub fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.tokenize_state.start = 0; - continuation(tokenizer) + State::Retry(StateName::HtmlFlowContinuation) } } } @@ -805,7 +805,7 @@ pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlFlowContinuationDeclarationInside) } - _ => continuation(tokenizer), + _ => State::Retry(StateName::HtmlFlowContinuation), } } @@ -833,7 +833,7 @@ pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlFlowContinuationDeclarationInside) } - _ => continuation(tokenizer), + _ => State::Retry(StateName::HtmlFlowContinuation), } } @@ -847,7 +847,7 @@ pub fn continuation_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::HtmlFlowData); - continuation_after(tokenizer) + State::Retry(StateName::HtmlFlowContinuationAfter) } _ => { tokenizer.consume(); diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 38d0476..df6bd99 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -174,7 +174,7 @@ pub fn comment_start(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlTextCommentStartDash) } - _ => comment(tokenizer), + _ => State::Retry(StateName::HtmlTextComment), } } @@ -194,7 +194,7 @@ pub fn comment_start(tokenizer: &mut Tokenizer) -> State { pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => State::Nok, - _ => comment(tokenizer), + _ => State::Retry(StateName::HtmlTextComment), } } @@ -209,7 +209,7 @@ pub fn comment(tokenizer: &mut Tokenizer) -> State { None => State::Nok, Some(b'\n') => { tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextComment); - line_ending_before(tokenizer) + State::Retry(StateName::HtmlTextLineEndingBefore) } Some(b'-') => { tokenizer.consume(); @@ -234,7 +234,7 @@ pub fn comment_close(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlTextEnd) } - _ => comment(tokenizer), + _ => State::Retry(StateName::HtmlTextComment), } } @@ -271,7 +271,7 @@ pub fn cdata(tokenizer: &mut Tokenizer) -> State { None => State::Nok, Some(b'\n') => { tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextCdata); - line_ending_before(tokenizer) + State::Retry(StateName::HtmlTextLineEndingBefore) } Some(b']') => { tokenizer.consume(); @@ -296,7 +296,7 @@ pub fn cdata_close(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlTextCdataEnd) } - _ => cdata(tokenizer), + _ => State::Retry(StateName::HtmlTextCdata), } } @@ -308,9 +308,9 @@ pub fn cdata_close(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn cdata_end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'>') => end(tokenizer), - Some(b']') => cdata_close(tokenizer), - _ => cdata(tokenizer), + Some(b'>') => State::Retry(StateName::HtmlTextEnd), + Some(b']') => State::Retry(StateName::HtmlTextCdataClose), + _ => State::Retry(StateName::HtmlTextCdata), } } @@ -322,10 +322,10 @@ pub fn cdata_end(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn declaration(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'>') => end(tokenizer), + None | Some(b'>') => State::Retry(StateName::HtmlTextEnd), Some(b'\n') => { tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextDeclaration); - line_ending_before(tokenizer) + State::Retry(StateName::HtmlTextLineEndingBefore) } _ => { tokenizer.consume(); @@ -345,7 +345,7 @@ pub fn instruction(tokenizer: &mut Tokenizer) -> State { None => State::Nok, Some(b'\n') => { tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextInstruction); - line_ending_before(tokenizer) + State::Retry(StateName::HtmlTextLineEndingBefore) } Some(b'?') => { tokenizer.consume(); @@ -366,8 +366,8 @@ pub fn instruction(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn instruction_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'>') => end(tokenizer), - _ => instruction(tokenizer), + Some(b'>') => State::Retry(StateName::HtmlTextEnd), + _ => State::Retry(StateName::HtmlTextInstruction), } } @@ -401,7 +401,7 @@ pub fn tag_close(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlTextTagClose) } - _ => tag_close_between(tokenizer), + _ => State::Retry(StateName::HtmlTextTagCloseBetween), } } @@ -415,13 +415,13 @@ pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagCloseBetween); - line_ending_before(tokenizer) + State::Retry(StateName::HtmlTextLineEndingBefore) } Some(b'\t' | b' ') => { tokenizer.consume(); State::Next(StateName::HtmlTextTagCloseBetween) } - _ => end(tokenizer), + _ => State::Retry(StateName::HtmlTextEnd), } } @@ -438,7 +438,7 @@ pub fn tag_open(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlTextTagOpen) } - Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer), + Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => State::Retry(StateName::HtmlTextTagOpenBetween), _ => State::Nok, } } @@ -453,7 +453,7 @@ pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagOpenBetween); - line_ending_before(tokenizer) + State::Retry(StateName::HtmlTextLineEndingBefore) } Some(b'\t' | b' ') => { tokenizer.consume(); @@ -468,7 +468,7 @@ pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlTextTagOpenAttributeName) } - _ => end(tokenizer), + _ => State::Retry(StateName::HtmlTextEnd), } } @@ -485,7 +485,7 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlTextTagOpenAttributeName) } - _ => tag_open_attribute_name_after(tokenizer), + _ => State::Retry(StateName::HtmlTextTagOpenAttributeNameAfter), } } @@ -501,7 +501,7 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { Some(b'\n') => { tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagOpenAttributeNameAfter); - line_ending_before(tokenizer) + State::Retry(StateName::HtmlTextLineEndingBefore) } Some(b'\t' | b' ') => { tokenizer.consume(); @@ -511,7 +511,7 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::HtmlTextTagOpenAttributeValueBefore) } - _ => tag_open_between(tokenizer), + _ => State::Retry(StateName::HtmlTextTagOpenBetween), } } @@ -528,7 +528,7 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { Some(b'\n') => { tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagOpenAttributeValueBefore); - line_ending_before(tokenizer) + State::Retry(StateName::HtmlTextLineEndingBefore) } Some(b'\t' | b' ') => { tokenizer.consume(); @@ -561,7 +561,7 @@ pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { Some(b'\n') => { tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagOpenAttributeValueQuoted); - line_ending_before(tokenizer) + State::Retry(StateName::HtmlTextLineEndingBefore) } Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.marker = 0; @@ -584,7 +584,7 @@ pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'"' | b'\'' | b'<' | b'=' | b'`') => State::Nok, - Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer), + Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => State::Retry(StateName::HtmlTextTagOpenBetween), Some(_) => { tokenizer.consume(); State::Next(StateName::HtmlTextTagOpenAttributeValueUnquoted) @@ -601,7 +601,7 @@ pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => tag_open_between(tokenizer), + Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => State::Retry(StateName::HtmlTextTagOpenBetween), _ => State::Nok, } } @@ -677,9 +677,6 @@ pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn line_ending_after_prefix(tokenizer: &mut Tokenizer) -> State { - let name = tokenizer.tokenize_state.return_state.take().unwrap(); - // To do: should use `State::Retry` when it exists. - let func = name.to_func(); tokenizer.enter(Token::HtmlTextData); - func(tokenizer) + State::Retry(tokenizer.tokenize_state.return_state.take().unwrap()) } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 0404b6e..3337cec 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -193,7 +193,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { // Mark as balanced if the info is inactive. if label_start.inactive { - return nok(tokenizer); + return State::Retry(StateName::LabelEndNok); } tokenizer.enter(Token::LabelEnd); @@ -257,11 +257,11 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { }), ), // Shortcut (`[asd]`) reference? - _ => { - // To do: use state names? - let func = if defined { ok } else { nok }; - func(tokenizer) - } + _ => State::Retry(if defined { + StateName::LabelEndOk + } else { + StateName::LabelEndNok + }), } } @@ -391,7 +391,7 @@ pub fn resource_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn resource_open(tokenizer: &mut Tokenizer) -> State { if let Some(b')') = tokenizer.current { - resource_end(tokenizer) + State::Retry(StateName::LabelEndResourceEnd) } else { tokenizer.tokenize_state.token_1 = Token::ResourceDestination; tokenizer.tokenize_state.token_2 = Token::ResourceDestinationLiteral; @@ -458,7 +458,7 @@ pub fn resource_between(tokenizer: &mut Tokenizer) -> State { State::Nok, ) } - _ => resource_end(tokenizer), + _ => State::Retry(StateName::LabelEndResourceEnd), } } diff --git a/src/construct/list.rs b/src/construct/list.rs index 1aec032..36c1dac 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -91,10 +91,10 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::ListNok), State::Next(StateName::ListBeforeUnordered), ), - Some(b'+') => before_unordered(tokenizer), + Some(b'+') => State::Retry(StateName::ListBeforeUnordered), // Ordered. - Some(b'0'..=b'9') if !tokenizer.interrupt => before_ordered(tokenizer), - Some(b'1') => before_ordered(tokenizer), + Some(b'0'..=b'9') if !tokenizer.interrupt => State::Retry(StateName::ListBeforeOrdered), + Some(b'1') => State::Retry(StateName::ListBeforeOrdered), _ => State::Nok, } } @@ -109,7 +109,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_unordered(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::ListItemPrefix); - marker(tokenizer) + State::Retry(StateName::ListMarker) } /// Start of an ordered list item. @@ -121,7 +121,7 @@ pub fn before_unordered(tokenizer: &mut Tokenizer) -> State { pub fn before_ordered(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::ListItemPrefix); tokenizer.enter(Token::ListItemValue); - value(tokenizer) + State::Retry(StateName::ListValue) } /// In an ordered list item value. @@ -134,7 +134,7 @@ pub fn value(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'.' | b')') if !tokenizer.interrupt || tokenizer.tokenize_state.size < 2 => { tokenizer.exit(Token::ListItemValue); - marker(tokenizer) + State::Retry(StateName::ListMarker) } Some(b'0'..=b'9') if tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX => { tokenizer.tokenize_state.size += 1; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index cb3d85e..bac4369 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -48,7 +48,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { _ => { tokenizer.enter(Token::Paragraph); tokenizer.enter_with_content(Token::Data, Some(ContentType::Text)); - inside(tokenizer) + State::Retry(StateName::ParagraphInside) } } } diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs index b2f6e1f..d20c2c7 100644 --- a/src/construct/partial_bom.rs +++ b/src/construct/partial_bom.rs @@ -24,7 +24,7 @@ const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF]; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(BOM[0]) { tokenizer.enter(Token::ByteOrderMark); - inside(tokenizer) + State::Retry(StateName::BomInside) } else { State::Nok } diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index 5450ff2..a68f359 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -23,7 +23,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::DataInside) } - _ => at_break(tokenizer), + _ => State::Retry(StateName::DataAtBreak), } } @@ -48,7 +48,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.enter(Token::Data); - inside(tokenizer) + State::Retry(StateName::DataInside) } } } @@ -68,7 +68,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { if done { tokenizer.exit(Token::Data); - at_break(tokenizer) + State::Retry(StateName::DataAtBreak) } else { tokenizer.consume(); State::Next(StateName::DataInside) diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index e4cfdc3..26fadc4 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -99,7 +99,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(tokenizer.tokenize_state.token_4.clone()); tokenizer.enter(tokenizer.tokenize_state.token_5.clone()); tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - raw(tokenizer) + State::Retry(StateName::DestinationRaw) } } } @@ -121,7 +121,7 @@ pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State { } else { tokenizer.enter(tokenizer.tokenize_state.token_5.clone()); tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - enclosed(tokenizer) + State::Retry(StateName::DestinationEnclosed) } } @@ -137,7 +137,7 @@ pub fn enclosed(tokenizer: &mut Tokenizer) -> State { Some(b'>') => { tokenizer.exit(Token::Data); tokenizer.exit(tokenizer.tokenize_state.token_5.clone()); - enclosed_before(tokenizer) + State::Retry(StateName::DestinationEnclosedBefore) } Some(b'\\') => { tokenizer.consume(); @@ -162,7 +162,7 @@ pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::DestinationEnclosed) } - _ => enclosed(tokenizer), + _ => State::Retry(StateName::DestinationEnclosed), } } @@ -220,6 +220,6 @@ pub fn raw_escape(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::DestinationRaw) } - _ => raw(tokenizer), + _ => State::Retry(StateName::DestinationRaw), } } diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 718af06..a151841 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -136,7 +136,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; } - inside(tokenizer) + State::Retry(StateName::LabelInside) } } } @@ -145,7 +145,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { /// To do. pub fn eol_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; - at_break(tokenizer) + State::Retry(StateName::LabelAtBreak) } /// To do. @@ -165,12 +165,12 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n' | b'[' | b']') => { tokenizer.exit(Token::Data); - at_break(tokenizer) + State::Retry(StateName::LabelAtBreak) } Some(byte) => { if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX { tokenizer.exit(Token::Data); - at_break(tokenizer) + State::Retry(StateName::LabelAtBreak) } else { tokenizer.consume(); tokenizer.tokenize_state.size += 1; @@ -200,6 +200,6 @@ pub fn escape(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size += 1; State::Next(StateName::LabelInside) } - _ => inside(tokenizer), + _ => State::Retry(StateName::LabelInside), } } diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 1ca2520..5e8c212 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -113,9 +113,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_connect = true; } - inside(tokenizer) + State::Retry(StateName::SpaceOrTabInside) } - _ => after(tokenizer), + _ => State::Retry(StateName::SpaceOrTabAfter), } } @@ -137,7 +137,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.exit(tokenizer.tokenize_state.space_or_tab_token.clone()); - after(tokenizer) + State::Retry(StateName::SpaceOrTabAfter) } } } @@ -198,7 +198,7 @@ pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_eol_connect = true; } - eol_at_eol(tokenizer) + State::Retry(StateName::SpaceOrTabEolAtEol) } /// `space_or_tab_eol`: after optionally first `space_or_tab`. diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 9521d32..0b81418 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -79,7 +79,7 @@ pub fn begin(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); - at_break(tokenizer) + State::Retry(StateName::TitleAtBreak) } } } @@ -116,7 +116,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); - begin(tokenizer) + State::Retry(StateName::TitleBegin) } Some(_) => { tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); @@ -128,7 +128,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; } - inside(tokenizer) + State::Retry(StateName::TitleInside) } } } @@ -136,7 +136,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { /// To do. pub fn after_eol(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; - at_break(tokenizer) + State::Retry(StateName::TitleAtBreak) } /// To do. @@ -156,13 +156,13 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::Data); - at_break(tokenizer) + State::Retry(StateName::TitleAtBreak) } Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.exit(Token::Data); - at_break(tokenizer) + State::Retry(StateName::TitleAtBreak) } Some(byte) => { tokenizer.consume(); @@ -187,6 +187,6 @@ pub fn escape(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::TitleInside) } - _ => inside(tokenizer), + _ => State::Retry(StateName::TitleInside), } } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index beefe5b..288d818 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -92,7 +92,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'*' | b'-' | b'_') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); - at_break(tokenizer) + State::Retry(StateName::ThematicBreakAtBreak) } _ => State::Nok, } @@ -118,7 +118,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.enter(Token::ThematicBreakSequence); - sequence(tokenizer) + State::Retry(StateName::ThematicBreakSequence) } _ => { tokenizer.tokenize_state.marker = 0; diff --git a/src/content/document.rs b/src/content/document.rs index 5ce4dc4..d47a31a 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -125,7 +125,7 @@ pub fn line_start(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_continued = 0; // Containers would only be interrupting if we’ve continued. tokenizer.interrupt = false; - container_existing_before(tokenizer) + State::Retry(StateName::DocumentContainerExistingBefore) } /// Before existing containers. @@ -158,7 +158,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { } // Otherwise, check new containers. else { - container_new_before(tokenizer) + State::Retry(StateName::DocumentContainerNewBefore) } } @@ -175,7 +175,7 @@ pub fn container_existing_missing(tokenizer: &mut Tokenizer) -> State { .tokenize_state .document_container_stack .insert(tokenizer.tokenize_state.document_continued, container); - container_new_before(tokenizer) + State::Retry(StateName::DocumentContainerNewBefore) } /// After an existing container. @@ -192,7 +192,7 @@ pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State { .document_container_stack .insert(tokenizer.tokenize_state.document_continued, container); tokenizer.tokenize_state.document_continued += 1; - container_existing_before(tokenizer) + State::Retry(StateName::DocumentContainerExistingBefore) } /// Before a new container. @@ -225,7 +225,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { .unwrap() .concrete { - return containers_after(tokenizer); + return State::Retry(StateName::DocumentContainersAfter); } } @@ -287,7 +287,7 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_continued += 1; tokenizer.tokenize_state.document_interrupt_before = false; tokenizer.interrupt = false; - container_new_before(tokenizer) + State::Retry(StateName::DocumentContainerNewBefore) } /// After containers, before flow. @@ -308,7 +308,7 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Note: EOL is part of data. - None => flow_end(tokenizer), + None => State::Retry(StateName::DocumentFlowEnd), Some(_) => { let current = tokenizer.events.len(); let previous = tokenizer.tokenize_state.document_data_index.take(); @@ -324,7 +324,7 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { content_type: ContentType::Flow, }), ); - flow_inside(tokenizer) + State::Retry(StateName::DocumentFlowInside) } } } @@ -334,7 +334,7 @@ pub fn flow_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { tokenizer.exit(Token::Data); - flow_end(tokenizer) + State::Retry(StateName::DocumentFlowEnd) } // Note: EOL is part of data. Some(b'\n') => { @@ -420,7 +420,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { Some(_) => { tokenizer.tokenize_state.document_paragraph_before = paragraph; tokenizer.tokenize_state.document_interrupt_before = interrupt; - line_start(tokenizer) + State::Retry(StateName::DocumentLineStart) } } } diff --git a/src/content/flow.rs b/src/content/flow.rs index cd913c6..886b5f0 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -67,7 +67,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ), // Actual parsing: blank line? Indented code? Indented anything? // Also includes `-` which can be a setext heading underline or a thematic break. - None | Some(b'\t' | b'\n' | b' ' | b'-') => before_blank_line(tokenizer), + None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), Some(_) => tokenizer.attempt( StateName::ParagraphStart, State::Next(StateName::FlowAfter), @@ -76,7 +76,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -pub fn before_blank_line(tokenizer: &mut Tokenizer) -> State { +pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( StateName::BlankLineStart, State::Next(StateName::FlowBlankLineAfter), diff --git a/src/content/string.rs b/src/content/string.rs index fda9b51..75cd56a 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -21,7 +21,7 @@ const MARKERS: [u8; 2] = [b'&', b'\\']; pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); tokenizer.tokenize_state.stop = &MARKERS; - before(tokenizer) + State::Retry(StateName::StringBefore) } /// Before string. @@ -38,7 +38,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::StringBefore), State::Next(StateName::StringBeforeData), ), - _ => before_data(tokenizer), + _ => State::Retry(StateName::StringBeforeData), } } diff --git a/src/content/text.rs b/src/content/text.rs index eb5a231..ee70f33 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -39,7 +39,7 @@ const MARKERS: [u8; 9] = [ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); tokenizer.tokenize_state.stop = &MARKERS; - before(tokenizer) + State::Retry(StateName::TextBefore) } /// Before text. @@ -87,7 +87,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::TextBefore), State::Next(StateName::TextBeforeData), ), - _ => before_data(tokenizer), + _ => State::Retry(StateName::TextBeforeData), } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 0efbfd6..56746ed 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -17,6 +17,7 @@ use crate::content; use crate::parser::ParseState; use crate::token::{Token, VOID_TOKENS}; use crate::util::edit_map::EditMap; +use std::str; /// Embedded content type. #[derive(Debug, Clone, PartialEq)] @@ -203,10 +204,10 @@ pub enum StateName { DocumentStart, DocumentLineStart, - // DocumentContainerExistingBefore, + DocumentContainerExistingBefore, DocumentContainerExistingAfter, DocumentContainerExistingMissing, - // DocumentContainerNewBefore, + DocumentContainerNewBefore, DocumentContainerNewBeforeNotBlockQuote, DocumentContainerNewAfter, DocumentContainersAfter, @@ -222,6 +223,7 @@ pub enum StateName { FlowBeforeThematicBreak, FlowBeforeDefinition, FlowAfter, + FlowBlankLineBefore, FlowBlankLineAfter, FlowBeforeParagraph, @@ -299,6 +301,7 @@ pub enum StateName { HtmlTextEnd, HtmlTextInstruction, HtmlTextInstructionClose, + HtmlTextLineEndingBefore, HtmlTextLineEndingAfter, HtmlTextLineEndingAfterPrefix, @@ -335,8 +338,10 @@ pub enum StateName { ListStart, ListBefore, ListNok, + ListBeforeOrdered, ListBeforeUnordered, ListValue, + ListMarker, ListMarkerAfter, ListAfter, ListMarkerAfterFilled, @@ -356,6 +361,7 @@ pub enum StateName { SpaceOrTabStart, SpaceOrTabInside, + SpaceOrTabAfter, SpaceOrTabEolStart, SpaceOrTabEolAfterFirst, @@ -381,6 +387,7 @@ pub enum StateName { TitleStart, TitleBegin, TitleAfterEol, + TitleAtBreak, TitleAtBlankLine, TitleEscape, TitleInside, @@ -491,14 +498,16 @@ impl StateName { StateName::DocumentStart => content::document::start, StateName::DocumentLineStart => content::document::line_start, - // StateName::DocumentContainerExistingBefore => content::document::container_existing_before, + StateName::DocumentContainerExistingBefore => { + content::document::container_existing_before + } StateName::DocumentContainerExistingAfter => { content::document::container_existing_after } StateName::DocumentContainerExistingMissing => { content::document::container_existing_missing } - // StateName::DocumentContainerNewBefore => content::document::container_new_before, + StateName::DocumentContainerNewBefore => content::document::container_new_before, StateName::DocumentContainerNewBeforeNotBlockQuote => { content::document::container_new_before_not_block_quote } @@ -516,6 +525,7 @@ impl StateName { StateName::FlowBeforeThematicBreak => content::flow::before_thematic_break, StateName::FlowBeforeDefinition => content::flow::before_definition, StateName::FlowAfter => content::flow::after, + StateName::FlowBlankLineBefore => content::flow::blank_line_before, StateName::FlowBlankLineAfter => content::flow::blank_line_after, StateName::FlowBeforeParagraph => content::flow::before_paragraph, @@ -633,6 +643,7 @@ impl StateName { StateName::HtmlTextEnd => construct::html_text::end, StateName::HtmlTextInstruction => construct::html_text::instruction, StateName::HtmlTextInstructionClose => construct::html_text::instruction_close, + StateName::HtmlTextLineEndingBefore => construct::html_text::line_ending_before, StateName::HtmlTextLineEndingAfter => construct::html_text::line_ending_after, StateName::HtmlTextLineEndingAfterPrefix => { construct::html_text::line_ending_after_prefix @@ -676,8 +687,10 @@ impl StateName { StateName::ListStart => construct::list::start, StateName::ListBefore => construct::list::before, StateName::ListNok => construct::list::nok, + StateName::ListBeforeOrdered => construct::list::before_ordered, StateName::ListBeforeUnordered => construct::list::before_unordered, StateName::ListValue => construct::list::value, + StateName::ListMarker => construct::list::marker, StateName::ListMarkerAfter => construct::list::marker_after, StateName::ListAfter => construct::list::after, StateName::ListMarkerAfterFilled => construct::list::marker_after_filled, @@ -697,6 +710,7 @@ impl StateName { StateName::SpaceOrTabStart => construct::partial_space_or_tab::start, StateName::SpaceOrTabInside => construct::partial_space_or_tab::inside, + StateName::SpaceOrTabAfter => construct::partial_space_or_tab::after, StateName::SpaceOrTabEolStart => construct::partial_space_or_tab::eol_start, StateName::SpaceOrTabEolAfterFirst => construct::partial_space_or_tab::eol_after_first, @@ -722,6 +736,7 @@ impl StateName { StateName::TitleStart => construct::partial_title::start, StateName::TitleBegin => construct::partial_title::begin, StateName::TitleAfterEol => construct::partial_title::after_eol, + StateName::TitleAtBreak => construct::partial_title::at_break, StateName::TitleAtBlankLine => construct::partial_title::at_blank_line, StateName::TitleEscape => construct::partial_title::escape, StateName::TitleInside => construct::partial_title::inside, @@ -734,8 +749,10 @@ impl StateName { /// The result of a state. #[derive(Debug, PartialEq, Copy, Clone)] pub enum State { - /// There is a future state: a [`StateName`][] to pass the next code to. + /// Move to [`StateName`][] next. Next(StateName), + /// Retry in [`StateName`][]. + Retry(StateName), /// The state is successful. Ok, /// The state is not successful. @@ -1049,7 +1066,6 @@ impl<'a> Tokenizer<'a> { /// Each state function is expected to call this to signal that this code is /// used, or call a next function. pub fn consume(&mut self) { - log::debug!("consume: `{:?}` ({:?})", self.current, self.point); debug_assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned"); self.move_one(); @@ -1125,7 +1141,7 @@ impl<'a> Tokenizer<'a> { let mut point = self.point.clone(); move_point_back(self, &mut point); - log::debug!("enter: `{:?}` ({:?})", token_type, point); + log::debug!("enter: `{:?}`", token_type); self.events.push(Event { event_type: EventType::Enter, token_type: token_type.clone(), @@ -1171,7 +1187,7 @@ impl<'a> Tokenizer<'a> { move_point_back(self, &mut point); } - log::debug!("exit: `{:?}` ({:?})", token_type, point); + log::debug!("exit: `{:?}`", token_type); self.events.push(Event { event_type: EventType::Exit, token_type, @@ -1263,6 +1279,10 @@ impl<'a> Tokenizer<'a> { let action = byte_action(self.parse_state.bytes, &self.point); state = feed_action_impl(self, &Some(action), name); } + State::Retry(name) => { + log::debug!(" retry {:?}", name); + state = call_impl(self, name); + } } } @@ -1296,6 +1316,10 @@ impl<'a> Tokenizer<'a> { name, ); } + State::Retry(name) => { + log::debug!(" retry {:?}", name); + state = call_impl(self, name); + } } } @@ -1417,9 +1441,11 @@ fn feed_action_impl( }; log::debug!( - "main: flushing: `{:?}` ({:?}) to {:?}", - byte, - tokenizer.point, + "feed: `{:?}` to {:?}", + byte.map_or_else( + || "eof".to_string(), + |d| str::from_utf8(&[d]).unwrap().to_string() + ), name ); tokenizer.expect(byte); |