From 6eb2f644057f371841fe25330a57ee185f91c7af Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 11 Aug 2022 13:45:24 +0200 Subject: Refactor to move some code to `state.rs` --- src/content/document.rs | 60 ++++++++++++++-------------- src/content/flow.rs | 101 ++++++++++++++++++++++++------------------------ src/content/string.rs | 25 ++++++------ src/content/text.rs | 73 +++++++++++++++++----------------- 4 files changed, 127 insertions(+), 132 deletions(-) (limited to 'src/content') diff --git a/src/content/document.rs b/src/content/document.rs index 49ca919..b5ff532 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -9,11 +9,11 @@ //! * [List][crate::construct::list] use crate::parser::ParseState; +use crate::state::{Name, State}; use crate::subtokenize::{divide_events, subtokenize}; use crate::token::Token; use crate::tokenizer::{ - Container, ContainerState, ContentType, Event, EventType, Link, Point, State, StateName, - Tokenizer, + Container, ContainerState, ContentType, Event, EventType, Link, Point, Tokenizer, }; use crate::util::{ normalize_identifier::normalize_identifier, @@ -59,7 +59,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec { let state = tokenizer.push( (0, 0), (parse_state.bytes.len(), 0), - State::Next(StateName::DocumentStart), + State::Next(Name::DocumentStart), ); tokenizer.flush(state, true); @@ -111,9 +111,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ))); tokenizer.attempt( - StateName::BomStart, - State::Next(StateName::DocumentContainerExistingBefore), - State::Next(StateName::DocumentContainerExistingBefore), + Name::BomStart, + State::Next(Name::DocumentContainerExistingBefore), + State::Next(Name::DocumentContainerExistingBefore), ) } @@ -134,16 +134,16 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( match container.kind { - Container::BlockQuote => StateName::BlockQuoteContStart, - Container::ListItem => StateName::ListContStart, + Container::BlockQuote => Name::BlockQuoteContStart, + Container::ListItem => Name::ListContStart, }, - State::Next(StateName::DocumentContainerExistingAfter), - State::Next(StateName::DocumentContainerNewBefore), + State::Next(Name::DocumentContainerExistingAfter), + State::Next(Name::DocumentContainerNewBefore), ) } // Otherwise, check new containers. else { - State::Retry(StateName::DocumentContainerNewBefore) + State::Retry(Name::DocumentContainerNewBefore) } } @@ -156,7 +156,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_continued += 1; - State::Retry(StateName::DocumentContainerExistingBefore) + State::Retry(Name::DocumentContainerExistingBefore) } /// Before a new container. @@ -180,7 +180,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { // …and if we’re in a concrete construct, new containers can’t “pierce” // into them. if child.concrete { - return State::Retry(StateName::DocumentContainersAfter); + return State::Retry(Name::DocumentContainersAfter); } } @@ -203,9 +203,9 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { .swap(tokenizer.tokenize_state.document_continued, tail); tokenizer.attempt( - StateName::BlockQuoteStart, - State::Next(StateName::DocumentContainerNewAfter), - State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote), + Name::BlockQuoteStart, + State::Next(Name::DocumentContainerNewAfter), + State::Next(Name::DocumentContainerNewBeforeNotBlockQuote), ) } @@ -226,9 +226,9 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State }; tokenizer.attempt( - StateName::ListStart, - State::Next(StateName::DocumentContainerNewAfter), - State::Next(StateName::DocumentContainerNewBeforeNotList), + Name::ListStart, + State::Next(Name::DocumentContainerNewAfter), + State::Next(Name::DocumentContainerNewBeforeNotList), ) } @@ -247,7 +247,7 @@ pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State { .document_container_stack .swap_remove(tokenizer.tokenize_state.document_continued); - State::Retry(StateName::DocumentContainersAfter) + State::Retry(Name::DocumentContainersAfter) } /// After a new container. @@ -281,7 +281,7 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { .push(container); tokenizer.tokenize_state.document_continued += 1; tokenizer.interrupt = false; - State::Retry(StateName::DocumentContainerNewBefore) + State::Retry(Name::DocumentContainerNewBefore) } /// After containers, before flow. @@ -301,7 +301,7 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Note: EOL is part of data. - None => State::Retry(StateName::DocumentFlowEnd), + None => State::Retry(Name::DocumentFlowEnd), Some(_) => { let current = tokenizer.events.len(); let previous = tokenizer.tokenize_state.document_data_index; @@ -317,7 +317,7 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { content_type: ContentType::Flow, }), ); - State::Retry(StateName::DocumentFlowInside) + State::Retry(Name::DocumentFlowInside) } } } @@ -332,17 +332,17 @@ pub fn flow_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { tokenizer.exit(Token::Data); - State::Retry(StateName::DocumentFlowEnd) + State::Retry(Name::DocumentFlowEnd) } // Note: EOL is part of data. Some(b'\n') => { tokenizer.consume(); tokenizer.exit(Token::Data); - State::Next(StateName::DocumentFlowEnd) + State::Next(Name::DocumentFlowEnd) } Some(_) => { tokenizer.consume(); - State::Next(StateName::DocumentFlowInside) + State::Next(Name::DocumentFlowInside) } } } @@ -359,7 +359,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { let state = tokenizer .tokenize_state .document_child_state - .unwrap_or(State::Next(StateName::FlowStart)); + .unwrap_or(State::Next(Name::FlowStart)); tokenizer.tokenize_state.document_exits.push(None); @@ -369,7 +369,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { state, ); - let paragraph = matches!(state, State::Next(StateName::ParagraphInside)) + let paragraph = matches!(state, State::Next(Name::ParagraphInside)) || (!child.events.is_empty() && child.events [skip::opt_back(&child.events, child.events.len() - 1, &[Token::LineEnding])] @@ -401,7 +401,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_paragraph_before = paragraph; // Containers would only be interrupting if we’ve continued. tokenizer.interrupt = false; - State::Retry(StateName::DocumentContainerExistingBefore) + State::Retry(Name::DocumentContainerExistingBefore) } } } @@ -421,7 +421,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { .tokenize_state .document_child_state .take() - .unwrap_or(State::Next(StateName::FlowStart)); + .unwrap_or(State::Next(Name::FlowStart)); child.flush(state, false); } diff --git a/src/content/flow.rs b/src/content/flow.rs index 886b5f0..16a1cba 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -19,8 +19,9 @@ //! * [HTML (flow)][crate::construct::html_flow] //! * [Thematic break][crate::construct::thematic_break] +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Before flow. /// @@ -35,42 +36,42 @@ use crate::tokenizer::{State, StateName, Tokenizer}; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`' | b'~') => tokenizer.attempt( - StateName::CodeFencedStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + Name::CodeFencedStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeParagraph), ), Some(b'<') => tokenizer.attempt( - StateName::HtmlFlowStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + Name::HtmlFlowStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeParagraph), ), Some(b'#') => tokenizer.attempt( - StateName::HeadingAtxStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + Name::HeadingAtxStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeParagraph), ), // Note: `-` is also used in thematic breaks, so it’s not included here. Some(b'=') => tokenizer.attempt( - StateName::HeadingSetextStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + Name::HeadingSetextStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeParagraph), ), Some(b'*' | b'_') => tokenizer.attempt( - StateName::ThematicBreakStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + Name::ThematicBreakStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeParagraph), ), Some(b'[') => tokenizer.attempt( - StateName::DefinitionStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + Name::DefinitionStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeParagraph), ), // Actual parsing: blank line? Indented code? Indented anything? // Also includes `-` which can be a setext heading underline or a thematic break. - None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), + None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(Name::FlowBlankLineBefore), Some(_) => tokenizer.attempt( - StateName::ParagraphStart, - State::Next(StateName::FlowAfter), + Name::ParagraphStart, + State::Next(Name::FlowAfter), State::Nok, ), } @@ -78,9 +79,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::BlankLineStart, - State::Next(StateName::FlowBlankLineAfter), - State::Next(StateName::FlowBeforeCodeIndented), + Name::BlankLineStart, + State::Next(Name::FlowBlankLineAfter), + State::Next(Name::FlowBeforeCodeIndented), ) } @@ -98,57 +99,57 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::CodeIndentedStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeCodeFenced), + Name::CodeIndentedStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeCodeFenced), ) } pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::CodeFencedStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeHtml), + Name::CodeFencedStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeHtml), ) } pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HtmlFlowStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeHeadingAtx), + Name::HtmlFlowStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeHeadingAtx), ) } pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HeadingAtxStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeHeadingSetext), + Name::HeadingAtxStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeHeadingSetext), ) } pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HeadingSetextStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeThematicBreak), + Name::HeadingSetextStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeThematicBreak), ) } pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::ThematicBreakStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeDefinition), + Name::ThematicBreakStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeDefinition), ) } pub fn before_definition(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::DefinitionStart, - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + Name::DefinitionStart, + State::Next(Name::FlowAfter), + State::Next(Name::FlowBeforeParagraph), ) } @@ -168,7 +169,7 @@ pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::BlankLineEnding); // Feel free to interrupt. tokenizer.interrupt = false; - State::Next(StateName::FlowStart) + State::Next(Name::FlowStart) } _ => unreachable!("expected eol/eof"), } @@ -190,7 +191,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Next(StateName::FlowStart) + State::Next(Name::FlowStart) } _ => unreachable!("expected eol/eof"), } @@ -203,8 +204,8 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::ParagraphStart, - State::Next(StateName::FlowAfter), + Name::ParagraphStart, + State::Next(Name::FlowAfter), State::Nok, ) } diff --git a/src/content/string.rs b/src/content/string.rs index 5dfceb0..927f582 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -13,7 +13,8 @@ //! [text]: crate::content::text use crate::construct::partial_whitespace::resolve_whitespace; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::state::{Name, State}; +use crate::tokenizer::Tokenizer; const MARKERS: [u8; 2] = [b'&', b'\\']; @@ -21,7 +22,7 @@ const MARKERS: [u8; 2] = [b'&', b'\\']; pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(StateName::StringBefore) + State::Retry(Name::StringBefore) } /// Before string. @@ -29,26 +30,22 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'&') => tokenizer.attempt( - StateName::CharacterReferenceStart, - State::Next(StateName::StringBefore), - State::Next(StateName::StringBeforeData), + Name::CharacterReferenceStart, + State::Next(Name::StringBefore), + State::Next(Name::StringBeforeData), ), Some(b'\\') => tokenizer.attempt( - StateName::CharacterEscapeStart, - State::Next(StateName::StringBefore), - State::Next(StateName::StringBeforeData), + Name::CharacterEscapeStart, + State::Next(Name::StringBefore), + State::Next(Name::StringBeforeData), ), - _ => State::Retry(StateName::StringBeforeData), + _ => State::Retry(Name::StringBeforeData), } } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - StateName::DataStart, - State::Next(StateName::StringBefore), - State::Nok, - ) + tokenizer.attempt(Name::DataStart, State::Next(Name::StringBefore), State::Nok) } /// Resolve whitespace. diff --git a/src/content/text.rs b/src/content/text.rs index 4e93779..1b3890e 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -21,7 +21,8 @@ //! > [whitespace][crate::construct::partial_whitespace]. use crate::construct::partial_whitespace::resolve_whitespace; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::state::{Name, State}; +use crate::tokenizer::Tokenizer; const MARKERS: [u8; 9] = [ b'!', // `label_start_image` @@ -39,7 +40,7 @@ const MARKERS: [u8; 9] = [ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(StateName::TextBefore) + State::Retry(Name::TextBefore) } /// Before text. @@ -47,75 +48,71 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'!') => tokenizer.attempt( - StateName::LabelStartImageStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), + Name::LabelStartImageStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeData), ), Some(b'&') => tokenizer.attempt( - StateName::CharacterReferenceStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), + Name::CharacterReferenceStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeData), ), Some(b'*' | b'_') => tokenizer.attempt( - StateName::AttentionStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), + Name::AttentionStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeData), ), // `autolink`, `html_text` (order does not matter) Some(b'<') => tokenizer.attempt( - StateName::AutolinkStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeHtml), + Name::AutolinkStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeHtml), ), Some(b'[') => tokenizer.attempt( - StateName::LabelStartLinkStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), + Name::LabelStartLinkStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeData), ), Some(b'\\') => tokenizer.attempt( - StateName::CharacterEscapeStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeHardBreakEscape), + Name::CharacterEscapeStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeHardBreakEscape), ), Some(b']') => tokenizer.attempt( - StateName::LabelEndStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), + Name::LabelEndStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeData), ), Some(b'`') => tokenizer.attempt( - StateName::CodeTextStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), + Name::CodeTextStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeData), ), - _ => State::Retry(StateName::TextBeforeData), + _ => State::Retry(Name::TextBeforeData), } } /// At `<`, which wasn’t an autolink: before HTML? pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HtmlTextStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), + Name::HtmlTextStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeData), ) } /// At `\`, which wasn’t a character escape: before a hard break? pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::HardBreakEscapeStart, - State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), + Name::HardBreakEscapeStart, + State::Next(Name::TextBefore), + State::Next(Name::TextBeforeData), ) } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt( - StateName::DataStart, - State::Next(StateName::TextBefore), - State::Nok, - ) + tokenizer.attempt(Name::DataStart, State::Next(Name::TextBefore), State::Nok) } /// Resolve whitespace. -- cgit