From 0d9c4611922535533746d1a86f10ef4e701c950e Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 11 Aug 2022 17:26:17 +0200 Subject: Refactor attempts to remove unneeded state name --- src/content/document.rs | 26 ++++++----- src/content/flow.rs | 120 ++++++++++++++++++++++++++---------------------- src/content/string.rs | 31 +++++++------ src/content/text.rs | 111 ++++++++++++++++++++++++-------------------- 4 files changed, 157 insertions(+), 131 deletions(-) (limited to 'src/content') diff --git a/src/content/document.rs b/src/content/document.rs index f2890f3..04f9dc6 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -81,10 +81,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ))); tokenizer.attempt( - StateName::BomStart, State::Next(StateName::DocumentContainerExistingBefore), State::Next(StateName::DocumentContainerExistingBefore), - ) + ); + + State::Retry(StateName::BomStart) } /// Before existing containers. @@ -102,14 +103,17 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { let container = &tokenizer.tokenize_state.document_container_stack [tokenizer.tokenize_state.document_continued]; + let name = match container.kind { + Container::BlockQuote => StateName::BlockQuoteContStart, + Container::ListItem => StateName::ListContStart, + }; + tokenizer.attempt( - match container.kind { - Container::BlockQuote => StateName::BlockQuoteContStart, - Container::ListItem => StateName::ListContStart, - }, State::Next(StateName::DocumentContainerExistingAfter), State::Next(StateName::DocumentContainerNewBefore), - ) + ); + + State::Retry(name) } // Otherwise, check new containers. else { @@ -173,10 +177,10 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { .swap(tokenizer.tokenize_state.document_continued, tail); tokenizer.attempt( - StateName::BlockQuoteStart, State::Next(StateName::DocumentContainerNewAfter), State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote), - ) + ); + State::Retry(StateName::BlockQuoteStart) } /// Maybe before a new container, but not a block quote. @@ -196,10 +200,10 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State }; tokenizer.attempt( - StateName::ListStart, State::Next(StateName::DocumentContainerNewAfter), State::Next(StateName::DocumentContainerNewBeforeNotList), - ) + ); + State::Retry(StateName::ListStart) } /// Maybe before a new container, but not a list. diff --git a/src/content/flow.rs b/src/content/flow.rs index 7eb7b64..c6bd398 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -35,54 +35,65 @@ use crate::tokenizer::Tokenizer; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'`' | b'~') => tokenizer.attempt( - StateName::CodeFencedStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), - Some(b'<') => tokenizer.attempt( - StateName::HtmlFlowStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), - Some(b'#') => tokenizer.attempt( - StateName::HeadingAtxStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), + Some(b'`' | b'~') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::CodeFencedStart) + } + Some(b'<') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::HtmlFlowStart) + } + Some(b'#') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::HeadingAtxStart) + } // Note: `-` is also used in thematic breaks, so it’s not included here. - Some(b'=') => tokenizer.attempt( - StateName::HeadingSetextStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), - Some(b'*' | b'_') => tokenizer.attempt( - StateName::ThematicBreakStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), - Some(b'[') => tokenizer.attempt( - StateName::DefinitionStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ), + Some(b'=') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::HeadingSetextStart) + } + Some(b'*' | b'_') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::ThematicBreakStart) + } + Some(b'[') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::DefinitionStart) + } // Actual parsing: blank line? Indented code? Indented anything? // Also includes `-` which can be a setext heading underline or a thematic break. None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), - Some(_) => tokenizer.attempt( - StateName::ParagraphStart, - State::Next(StateName::FlowAfter), - State::Nok, - ), + Some(_) => { + tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); + State::Retry(StateName::ParagraphStart) + } } } pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::BlankLineStart, State::Next(StateName::FlowBlankLineAfter), State::Next(StateName::FlowBeforeCodeIndented), - ) + ); + State::Retry(StateName::BlankLineStart) } /// Before flow (initial). @@ -99,58 +110,58 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::CodeIndentedStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeCodeFenced), - ) + ); + State::Retry(StateName::CodeIndentedStart) } pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::CodeFencedStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeHtml), - ) + ); + State::Retry(StateName::CodeFencedStart) } pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HtmlFlowStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeHeadingAtx), - ) + ); + State::Retry(StateName::HtmlFlowStart) } pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HeadingAtxStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeHeadingSetext), - ) + ); + State::Retry(StateName::HeadingAtxStart) } pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HeadingSetextStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeThematicBreak), - ) + ); + State::Retry(StateName::HeadingSetextStart) } pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::ThematicBreakStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeDefinition), - ) + ); + State::Retry(StateName::ThematicBreakStart) } pub fn before_definition(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::DefinitionStart, State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeParagraph), - ) + ); + State::Retry(StateName::DefinitionStart) } /// After a blank line. @@ -203,9 +214,6 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// |asd /// ``` pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - StateName::ParagraphStart, - State::Next(StateName::FlowAfter), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); + State::Retry(StateName::ParagraphStart) } diff --git a/src/content/string.rs b/src/content/string.rs index ce850e7..1eefd30 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -30,27 +30,28 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, - Some(b'&') => tokenizer.attempt( - StateName::CharacterReferenceStart, - State::Next(StateName::StringBefore), - State::Next(StateName::StringBeforeData), - ), - Some(b'\\') => tokenizer.attempt( - StateName::CharacterEscapeStart, - State::Next(StateName::StringBefore), - State::Next(StateName::StringBeforeData), - ), + Some(b'&') => { + tokenizer.attempt( + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), + ); + State::Retry(StateName::CharacterReferenceStart) + } + Some(b'\\') => { + tokenizer.attempt( + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), + ); + State::Retry(StateName::CharacterEscapeStart) + } _ => State::Retry(StateName::StringBeforeData), } } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - StateName::DataStart, - State::Next(StateName::StringBefore), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok); + State::Retry(StateName::DataStart) } /// Resolve whitespace. diff --git a/src/content/text.rs b/src/content/text.rs index 570759d..6509d30 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -48,47 +48,63 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, - Some(b'!') => tokenizer.attempt( - StateName::LabelStartImageStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), - Some(b'&') => tokenizer.attempt( - StateName::CharacterReferenceStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), - Some(b'*' | b'_') => tokenizer.attempt( - StateName::AttentionStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), + Some(b'!') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::LabelStartImageStart) + } + Some(b'&') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::CharacterReferenceStart) + } + Some(b'*' | b'_') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::AttentionStart) + } // `autolink`, `html_text` (order does not matter) - Some(b'<') => tokenizer.attempt( - StateName::AutolinkStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeHtml), - ), - Some(b'[') => tokenizer.attempt( - StateName::LabelStartLinkStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), - Some(b'\\') => tokenizer.attempt( - StateName::CharacterEscapeStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeHardBreakEscape), - ), - Some(b']') => tokenizer.attempt( - StateName::LabelEndStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), - Some(b'`') => tokenizer.attempt( - StateName::CodeTextStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), - ), + Some(b'<') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeHtml), + ); + State::Retry(StateName::AutolinkStart) + } + Some(b'[') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::LabelStartLinkStart) + } + Some(b'\\') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeHardBreakEscape), + ); + State::Retry(StateName::CharacterEscapeStart) + } + Some(b']') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::LabelEndStart) + } + Some(b'`') => { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::CodeTextStart) + } _ => State::Retry(StateName::TextBeforeData), } } @@ -96,28 +112,25 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { /// At `<`, which wasn’t an autolink: before HTML? pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HtmlTextStart, State::Next(StateName::TextBefore), State::Next(StateName::TextBeforeData), - ) + ); + State::Retry(StateName::HtmlTextStart) } /// At `\`, which wasn’t a character escape: before a hard break? pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HardBreakEscapeStart, State::Next(StateName::TextBefore), State::Next(StateName::TextBeforeData), - ) + ); + State::Retry(StateName::HardBreakEscapeStart) } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - StateName::DataStart, - State::Next(StateName::TextBefore), - State::Nok, - ) + tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok); + State::Retry(StateName::DataStart) } /// Resolve whitespace. -- cgit