From 6eb2f644057f371841fe25330a57ee185f91c7af Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 11 Aug 2022 13:45:24 +0200 Subject: Refactor to move some code to `state.rs` --- src/construct/attention.rs | 7 +- src/construct/autolink.rs | 37 +- src/construct/blank_line.rs | 7 +- src/construct/block_quote.rs | 17 +- src/construct/character_escape.rs | 7 +- src/construct/character_reference.rs | 19 +- src/construct/code_fenced.rs | 71 +-- src/construct/code_indented.rs | 35 +- src/construct/code_text.rs | 23 +- src/construct/definition.rs | 43 +- src/construct/hard_break_escape.rs | 5 +- src/construct/heading_atx.rs | 29 +- src/construct/heading_setext.rs | 17 +- src/construct/html_flow.rs | 151 +++--- src/construct/html_text.rs | 155 +++--- src/construct/label_end.rs | 67 +-- src/construct/label_start_image.rs | 5 +- src/construct/label_start_link.rs | 3 +- src/construct/list.rs | 57 ++- src/construct/paragraph.rs | 7 +- src/construct/partial_bom.rs | 7 +- src/construct/partial_data.rs | 15 +- src/construct/partial_destination.rs | 31 +- src/construct/partial_label.rs | 25 +- src/construct/partial_non_lazy_continuation.rs | 5 +- src/construct/partial_space_or_tab.rs | 37 +- src/construct/partial_title.rs | 29 +- src/construct/thematic_break.rs | 19 +- src/content/document.rs | 60 +-- src/content/flow.rs | 101 ++-- src/content/string.rs | 25 +- src/content/text.rs | 73 ++- src/lib.rs | 1 + src/state.rs | 622 ++++++++++++++++++++++++ src/subtokenize.rs | 7 +- src/tokenizer.rs | 647 +------------------------ 36 files changed, 1229 insertions(+), 1237 deletions(-) create mode 100644 src/state.rs diff --git a/src/construct/attention.rs b/src/construct/attention.rs index d61813d..7e873ca 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -51,8 +51,9 @@ //! [html-em]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-em-element //! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{Event, EventType, Point, State, StateName, Tokenizer}; +use crate::tokenizer::{Event, EventType, Point, Tokenizer}; use crate::unicode::PUNCTUATION; use crate::util::slice::Slice; @@ -120,7 +121,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { Some(b'*' | b'_') if tokenizer.parse_state.constructs.attention => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.enter(Token::AttentionSequence); - State::Retry(StateName::AttentionInside) + State::Retry(Name::AttentionInside) } _ => State::Nok, } @@ -136,7 +137,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.consume(); - State::Next(StateName::AttentionInside) + State::Next(Name::AttentionInside) } _ => { tokenizer.exit(Token::AttentionSequence); diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index eef3840..b635d96 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -102,8 +102,9 @@ //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX}; +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Start of an autolink. /// @@ -121,7 +122,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(Token::AutolinkMarker); tokenizer.enter(Token::AutolinkProtocol); - State::Next(StateName::AutolinkOpen) + State::Next(Name::AutolinkOpen) } _ => State::Nok, } @@ -140,9 +141,9 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { // ASCII alphabetic. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(StateName::AutolinkSchemeOrEmailAtext) + State::Next(Name::AutolinkSchemeOrEmailAtext) } - _ => State::Retry(StateName::AutolinkEmailAtext), + _ => State::Retry(Name::AutolinkEmailAtext), } } @@ -160,9 +161,9 @@ pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { // Count the previous alphabetical from `open` too. tokenizer.tokenize_state.size = 1; - State::Retry(StateName::AutolinkSchemeInsideOrEmailAtext) + State::Retry(Name::AutolinkSchemeInsideOrEmailAtext) } - _ => State::Retry(StateName::AutolinkEmailAtext), + _ => State::Retry(Name::AutolinkEmailAtext), } } @@ -179,7 +180,7 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { Some(b':') => { tokenizer.consume(); tokenizer.tokenize_state.size = 0; - State::Next(StateName::AutolinkUrlInside) + State::Next(Name::AutolinkUrlInside) } // ASCII alphanumeric and `+`, `-`, and `.`. Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') @@ -187,11 +188,11 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(StateName::AutolinkSchemeInsideOrEmailAtext) + State::Next(Name::AutolinkSchemeInsideOrEmailAtext) } _ => { tokenizer.tokenize_state.size = 0; - State::Retry(StateName::AutolinkEmailAtext) + State::Retry(Name::AutolinkEmailAtext) } } } @@ -216,7 +217,7 @@ pub fn url_inside(tokenizer: &mut Tokenizer) -> State { None | Some(b'\0'..=0x1F | b' ' | b'<' | 0x7F) => State::Nok, Some(_) => { tokenizer.consume(); - State::Next(StateName::AutolinkUrlInside) + State::Next(Name::AutolinkUrlInside) } } } @@ -231,7 +232,7 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'@') => { tokenizer.consume(); - State::Next(StateName::AutolinkEmailAtSignOrDot) + State::Next(Name::AutolinkEmailAtSignOrDot) } // ASCII atext. // @@ -254,7 +255,7 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State { b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~', ) => { tokenizer.consume(); - State::Next(StateName::AutolinkEmailAtext) + State::Next(Name::AutolinkEmailAtext) } _ => State::Nok, } @@ -269,9 +270,7 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State { pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphanumeric. - Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { - State::Retry(StateName::AutolinkEmailValue) - } + Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => State::Retry(Name::AutolinkEmailValue), _ => State::Nok, } } @@ -287,7 +286,7 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State { Some(b'.') => { tokenizer.tokenize_state.size = 0; tokenizer.consume(); - State::Next(StateName::AutolinkEmailAtSignOrDot) + State::Next(Name::AutolinkEmailAtSignOrDot) } Some(b'>') => { tokenizer.tokenize_state.size = 0; @@ -302,7 +301,7 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::Autolink); State::Ok } - _ => State::Retry(StateName::AutolinkEmailValue), + _ => State::Retry(Name::AutolinkEmailValue), } } @@ -321,9 +320,9 @@ pub fn email_value(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.size < AUTOLINK_DOMAIN_SIZE_MAX => { let name = if matches!(tokenizer.current, Some(b'-')) { - StateName::AutolinkEmailValue + Name::AutolinkEmailValue } else { - StateName::AutolinkEmailLabel + Name::AutolinkEmailLabel }; tokenizer.tokenize_state.size += 1; tokenizer.consume(); diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index d7d4817..e8a06e9 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -33,7 +33,8 @@ //! [flow]: crate::content::flow use crate::construct::partial_space_or_tab::space_or_tab; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::state::{Name, State}; +use crate::tokenizer::Tokenizer; /// Start of a blank line. /// @@ -49,8 +50,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(StateName::BlankLineAfter), - State::Next(StateName::BlankLineAfter), + State::Next(Name::BlankLineAfter), + State::Next(Name::BlankLineAfter), ) } diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index bbfad5b..7b8ce82 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -35,8 +35,9 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::space_or_tab_min_max; +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Start of block quote. /// @@ -55,7 +56,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.attempt(name, State::Next(StateName::BlockQuoteBefore), State::Nok) + tokenizer.attempt(name, State::Next(Name::BlockQuoteBefore), State::Nok) } else { State::Nok } @@ -71,9 +72,9 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.enter(Token::BlockQuote); - State::Retry(StateName::BlockQuoteContBefore) + State::Retry(Name::BlockQuoteContBefore) } - _ => State::Retry(StateName::BlockQuoteContBefore), + _ => State::Retry(Name::BlockQuoteContBefore), } } @@ -94,11 +95,7 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.attempt( - name, - State::Next(StateName::BlockQuoteContBefore), - State::Nok, - ) + tokenizer.attempt(name, State::Next(Name::BlockQuoteContBefore), State::Nok) } /// After whitespace, before `>`. @@ -115,7 +112,7 @@ pub fn cont_before(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::BlockQuoteMarker); tokenizer.consume(); tokenizer.exit(Token::BlockQuoteMarker); - State::Next(StateName::BlockQuoteContAfter) + State::Next(Name::BlockQuoteContAfter) } _ => State::Nok, } diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 52b2873..c3d5458 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -33,8 +33,9 @@ //! [character_reference]: crate::construct::character_reference //! [hard_break_escape]: crate::construct::hard_break_escape +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Start of a character escape. /// @@ -49,7 +50,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::CharacterEscapeMarker); tokenizer.consume(); tokenizer.exit(Token::CharacterEscapeMarker); - State::Next(StateName::CharacterEscapeInside) + State::Next(Name::CharacterEscapeInside) } _ => State::Nok, } @@ -61,7 +62,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | a\*b /// ^ /// ``` -// StateName::CharacterEscapeInside +// Name::CharacterEscapeInside pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII punctuation. diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index e1c7e79..435c115 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -65,8 +65,9 @@ use crate::constant::{ CHARACTER_REFERENCES, CHARACTER_REFERENCE_DECIMAL_SIZE_MAX, CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, CHARACTER_REFERENCE_NAMED_SIZE_MAX, }; +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; use crate::util::slice::Slice; /// Start of a character reference. @@ -86,7 +87,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::CharacterReferenceMarker); tokenizer.consume(); tokenizer.exit(Token::CharacterReferenceMarker); - State::Next(StateName::CharacterReferenceOpen) + State::Next(Name::CharacterReferenceOpen) } _ => State::Nok, } @@ -103,17 +104,17 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | a b /// ^ /// ``` -// StateName::CharacterReferenceOpen +// Name::CharacterReferenceOpen pub fn open(tokenizer: &mut Tokenizer) -> State { if let Some(b'#') = tokenizer.current { tokenizer.enter(Token::CharacterReferenceMarkerNumeric); tokenizer.consume(); tokenizer.exit(Token::CharacterReferenceMarkerNumeric); - State::Next(StateName::CharacterReferenceNumeric) + State::Next(Name::CharacterReferenceNumeric) } else { tokenizer.tokenize_state.marker = b'&'; tokenizer.enter(Token::CharacterReferenceValue); - State::Retry(StateName::CharacterReferenceValue) + State::Retry(Name::CharacterReferenceValue) } } @@ -126,7 +127,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { /// > | a b /// ^ /// ``` -// StateName::CharacterReferenceNumeric +// Name::CharacterReferenceNumeric pub fn numeric(tokenizer: &mut Tokenizer) -> State { if let Some(b'x' | b'X') = tokenizer.current { tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal); @@ -134,11 +135,11 @@ pub fn numeric(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal); tokenizer.enter(Token::CharacterReferenceValue); tokenizer.tokenize_state.marker = b'x'; - State::Next(StateName::CharacterReferenceValue) + State::Next(Name::CharacterReferenceValue) } else { tokenizer.enter(Token::CharacterReferenceValue); tokenizer.tokenize_state.marker = b'#'; - State::Retry(StateName::CharacterReferenceValue) + State::Retry(Name::CharacterReferenceValue) } } @@ -202,7 +203,7 @@ pub fn value(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.size < max && test(&byte) { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - return State::Next(StateName::CharacterReferenceValue); + return State::Next(Name::CharacterReferenceValue); } } diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 26e1148..0ce8d02 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -103,8 +103,9 @@ use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE}; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{ContentType, State, StateName, Tokenizer}; +use crate::tokenizer::{ContentType, Tokenizer}; use crate::util::slice::{Position, Slice}; /// Start of fenced code. @@ -130,7 +131,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); tokenizer.attempt( name, - State::Next(StateName::CodeFencedBeforeSequenceOpen), + State::Next(Name::CodeFencedBeforeSequenceOpen), State::Nok, ) } else { @@ -164,7 +165,7 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.tokenize_state.size_c = prefix; tokenizer.enter(Token::CodeFencedFenceSequence); - State::Retry(StateName::CodeFencedSequenceOpen) + State::Retry(Name::CodeFencedSequenceOpen) } else { State::Nok } @@ -183,15 +184,15 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(StateName::CodeFencedSequenceOpen) + State::Next(Name::CodeFencedSequenceOpen) } _ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => { tokenizer.exit(Token::CodeFencedFenceSequence); let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(StateName::CodeFencedInfoBefore), - State::Next(StateName::CodeFencedInfoBefore), + State::Next(Name::CodeFencedInfoBefore), + State::Next(Name::CodeFencedInfoBefore), ) } _ => { @@ -218,15 +219,15 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State { // Do not form containers. tokenizer.concrete = true; tokenizer.check( - StateName::NonLazyContinuationStart, - State::Next(StateName::CodeFencedAtNonLazyBreak), - State::Next(StateName::CodeFencedAfter), + Name::NonLazyContinuationStart, + State::Next(Name::CodeFencedAtNonLazyBreak), + State::Next(Name::CodeFencedAfter), ) } _ => { tokenizer.enter(Token::CodeFencedFenceInfo); tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - State::Retry(StateName::CodeFencedInfo) + State::Retry(Name::CodeFencedInfo) } } } @@ -244,7 +245,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') => { tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceInfo); - State::Retry(StateName::CodeFencedInfoBefore) + State::Retry(Name::CodeFencedInfoBefore) } Some(b'\t' | b' ') => { tokenizer.exit(Token::Data); @@ -252,8 +253,8 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(StateName::CodeFencedMetaBefore), - State::Next(StateName::CodeFencedMetaBefore), + State::Next(Name::CodeFencedMetaBefore), + State::Next(Name::CodeFencedMetaBefore), ) } Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { @@ -265,7 +266,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { } Some(_) => { tokenizer.consume(); - State::Next(StateName::CodeFencedInfo) + State::Next(Name::CodeFencedInfo) } } } @@ -280,11 +281,11 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn meta_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => State::Retry(StateName::CodeFencedInfoBefore), + None | Some(b'\n') => State::Retry(Name::CodeFencedInfoBefore), _ => { tokenizer.enter(Token::CodeFencedFenceMeta); tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - State::Retry(StateName::CodeFencedMeta) + State::Retry(Name::CodeFencedMeta) } } } @@ -302,7 +303,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') => { tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceMeta); - State::Retry(StateName::CodeFencedInfoBefore) + State::Retry(Name::CodeFencedInfoBefore) } Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { tokenizer.concrete = false; @@ -313,7 +314,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Next(StateName::CodeFencedMeta) + State::Next(Name::CodeFencedMeta) } } } @@ -329,9 +330,9 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - StateName::CodeFencedCloseBefore, - State::Next(StateName::CodeFencedAfter), - State::Next(StateName::CodeFencedContentBefore), + Name::CodeFencedCloseBefore, + State::Next(Name::CodeFencedAfter), + State::Next(Name::CodeFencedContentBefore), ) } @@ -349,7 +350,7 @@ pub fn close_before(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Next(StateName::CodeFencedCloseStart) + State::Next(Name::CodeFencedCloseStart) } _ => unreachable!("expected eol"), } @@ -376,7 +377,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { ); tokenizer.attempt( name, - State::Next(StateName::CodeFencedBeforeSequenceClose), + State::Next(Name::CodeFencedBeforeSequenceClose), State::Nok, ) } @@ -393,7 +394,7 @@ pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.enter(Token::CodeFencedFenceSequence); - State::Retry(StateName::CodeFencedSequenceClose) + State::Retry(Name::CodeFencedSequenceClose) } _ => State::Nok, } @@ -412,7 +413,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.size_b += 1; tokenizer.consume(); - State::Next(StateName::CodeFencedSequenceClose) + State::Next(Name::CodeFencedSequenceClose) } _ if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size => @@ -422,8 +423,8 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(StateName::CodeFencedAfterSequenceClose), - State::Next(StateName::CodeFencedAfterSequenceClose), + State::Next(Name::CodeFencedAfterSequenceClose), + State::Next(Name::CodeFencedAfterSequenceClose), ) } _ => { @@ -463,7 +464,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Next(StateName::CodeFencedContentStart) + State::Next(Name::CodeFencedContentStart) } /// Before code content, definitely not before a closing fence. /// @@ -477,7 +478,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.size_c); tokenizer.attempt( name, - State::Next(StateName::CodeFencedBeforeContentChunk), + State::Next(Name::CodeFencedBeforeContentChunk), State::Nok, ) } @@ -493,13 +494,13 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => tokenizer.check( - StateName::NonLazyContinuationStart, - State::Next(StateName::CodeFencedAtNonLazyBreak), - State::Next(StateName::CodeFencedAfter), + Name::NonLazyContinuationStart, + State::Next(Name::CodeFencedAtNonLazyBreak), + State::Next(Name::CodeFencedAfter), ), _ => { tokenizer.enter(Token::CodeFlowChunk); - State::Retry(StateName::CodeFencedContentChunk) + State::Retry(Name::CodeFencedContentChunk) } } } @@ -516,11 +517,11 @@ pub fn content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::CodeFlowChunk); - State::Retry(StateName::CodeFencedBeforeContentChunk) + State::Retry(Name::CodeFencedBeforeContentChunk) } _ => { tokenizer.consume(); - State::Next(StateName::CodeFencedContentChunk) + State::Next(Name::CodeFencedContentChunk) } } } diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 36ae4c6..f442f27 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -47,8 +47,9 @@ use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::TAB_SIZE; +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Start of code (indented). /// @@ -65,11 +66,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented { tokenizer.enter(Token::CodeIndented); let name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); - tokenizer.attempt( - name, - State::Next(StateName::CodeIndentedAtBreak), - State::Nok, - ) + tokenizer.attempt(name, State::Next(Name::CodeIndentedAtBreak), State::Nok) } else { State::Nok } @@ -83,15 +80,15 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None => State::Retry(StateName::CodeIndentedAfter), + None => State::Retry(Name::CodeIndentedAfter), Some(b'\n') => tokenizer.attempt( - StateName::CodeIndentedFurtherStart, - State::Next(StateName::CodeIndentedAtBreak), - State::Next(StateName::CodeIndentedAfter), + Name::CodeIndentedFurtherStart, + State::Next(Name::CodeIndentedAtBreak), + State::Next(Name::CodeIndentedAfter), ), _ => { tokenizer.enter(Token::CodeFlowChunk); - State::Retry(StateName::CodeIndentedInside) + State::Retry(Name::CodeIndentedInside) } } } @@ -106,11 +103,11 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Token::CodeFlowChunk); - State::Retry(StateName::CodeIndentedAtBreak) + State::Retry(Name::CodeIndentedAtBreak) } _ => { tokenizer.consume(); - State::Next(StateName::CodeIndentedInside) + State::Next(Name::CodeIndentedInside) } } } @@ -141,14 +138,14 @@ pub fn further_start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Next(StateName::CodeIndentedFurtherStart) + State::Next(Name::CodeIndentedFurtherStart) } _ if !tokenizer.lazy => { let name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); tokenizer.attempt( name, - State::Next(StateName::CodeIndentedFurtherEnd), - State::Next(StateName::CodeIndentedFurtherBegin), + State::Next(Name::CodeIndentedFurtherEnd), + State::Next(Name::CodeIndentedFurtherBegin), ) } _ => State::Nok, @@ -177,8 +174,8 @@ pub fn further_begin(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(StateName::CodeIndentedFurtherAfter), - State::Next(StateName::CodeIndentedFurtherAfter), + State::Next(Name::CodeIndentedFurtherAfter), + State::Next(Name::CodeIndentedFurtherAfter), ) } @@ -191,7 +188,7 @@ pub fn further_begin(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn further_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\n') => State::Retry(StateName::CodeIndentedFurtherStart), + Some(b'\n') => State::Retry(Name::CodeIndentedFurtherStart), _ => State::Nok, } } diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index d7ada3d..f626cfb 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -83,8 +83,9 @@ //! [code_fenced]: crate::construct::code_fenced //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Start of code (text). /// @@ -105,7 +106,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { { tokenizer.enter(Token::CodeText); tokenizer.enter(Token::CodeTextSequence); - State::Retry(StateName::CodeTextSequenceOpen) + State::Retry(Name::CodeTextSequenceOpen) } _ => State::Nok, } @@ -121,10 +122,10 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { if let Some(b'`') = tokenizer.current { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(StateName::CodeTextSequenceOpen) + State::Next(Name::CodeTextSequenceOpen) } else { tokenizer.exit(Token::CodeTextSequence); - State::Retry(StateName::CodeTextBetween) + State::Retry(Name::CodeTextBetween) } } @@ -144,15 +145,15 @@ pub fn between(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); - State::Next(StateName::CodeTextBetween) + State::Next(Name::CodeTextBetween) } Some(b'`') => { tokenizer.enter(Token::CodeTextSequence); - State::Retry(StateName::CodeTextSequenceClose) + State::Retry(Name::CodeTextSequenceClose) } _ => { tokenizer.enter(Token::CodeTextData); - State::Retry(StateName::CodeTextData) + State::Retry(Name::CodeTextData) } } } @@ -167,11 +168,11 @@ pub fn data(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n' | b'`') => { tokenizer.exit(Token::CodeTextData); - State::Retry(StateName::CodeTextBetween) + State::Retry(Name::CodeTextBetween) } _ => { tokenizer.consume(); - State::Next(StateName::CodeTextData) + State::Next(Name::CodeTextData) } } } @@ -187,7 +188,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { Some(b'`') => { tokenizer.tokenize_state.size_b += 1; tokenizer.consume(); - State::Next(StateName::CodeTextSequenceClose) + State::Next(Name::CodeTextSequenceClose) } _ => { if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_b { @@ -203,7 +204,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { tokenizer.events[index - 1].token_type = Token::CodeTextData; tokenizer.events[index].token_type = Token::CodeTextData; tokenizer.tokenize_state.size_b = 0; - State::Retry(StateName::CodeTextBetween) + State::Retry(Name::CodeTextBetween) } } } diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 5db611b..394375f 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -94,8 +94,9 @@ //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_eol}; +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; use crate::util::skip::opt_back as skip_opt_back; /// At the start of a definition. @@ -122,8 +123,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(StateName::DefinitionBefore), - State::Next(StateName::DefinitionBefore), + State::Next(Name::DefinitionBefore), + State::Next(Name::DefinitionBefore), ) } else { State::Nok @@ -143,8 +144,8 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_2 = Token::DefinitionLabelMarker; tokenizer.tokenize_state.token_3 = Token::DefinitionLabelString; tokenizer.attempt( - StateName::LabelStart, - State::Next(StateName::DefinitionLabelAfter), + Name::LabelStart, + State::Next(Name::DefinitionLabelAfter), State::Nok, ) } @@ -168,7 +169,7 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Token::DefinitionMarker); tokenizer.consume(); tokenizer.exit(Token::DefinitionMarker); - State::Next(StateName::DefinitionMarkerAfter) + State::Next(Name::DefinitionMarkerAfter) } _ => State::Nok, } @@ -184,8 +185,8 @@ pub fn marker_after(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, - State::Next(StateName::DefinitionDestinationBefore), - State::Next(StateName::DefinitionDestinationBefore), + State::Next(Name::DefinitionDestinationBefore), + State::Next(Name::DefinitionDestinationBefore), ) } @@ -203,9 +204,9 @@ pub fn destination_before(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_5 = Token::DefinitionDestinationString; tokenizer.tokenize_state.size_b = usize::MAX; tokenizer.attempt( - StateName::DestinationStart, - State::Next(StateName::DefinitionDestinationAfter), - State::Next(StateName::DefinitionDestinationMissing), + Name::DestinationStart, + State::Next(Name::DefinitionDestinationAfter), + State::Next(Name::DefinitionDestinationMissing), ) } @@ -223,9 +224,9 @@ pub fn destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_5 = Token::Data; tokenizer.tokenize_state.size_b = 0; tokenizer.attempt( - StateName::DefinitionTitleBefore, - State::Next(StateName::DefinitionAfter), - State::Next(StateName::DefinitionAfter), + Name::DefinitionTitleBefore, + State::Next(Name::DefinitionAfter), + State::Next(Name::DefinitionAfter), ) } @@ -252,8 +253,8 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(StateName::DefinitionAfterWhitespace), - State::Next(StateName::DefinitionAfterWhitespace), + State::Next(Name::DefinitionAfterWhitespace), + State::Next(Name::DefinitionAfterWhitespace), ) } @@ -289,7 +290,7 @@ pub fn title_before(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, - State::Next(StateName::DefinitionTitleBeforeMarker), + State::Next(Name::DefinitionTitleBeforeMarker), State::Nok, ) } @@ -306,8 +307,8 @@ pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_2 = Token::DefinitionTitleMarker; tokenizer.tokenize_state.token_3 = Token::DefinitionTitleString; tokenizer.attempt( - StateName::TitleStart, - State::Next(StateName::DefinitionTitleAfter), + Name::TitleStart, + State::Next(Name::DefinitionTitleAfter), State::Nok, ) } @@ -325,8 +326,8 @@ pub fn title_after(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), - State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), + State::Next(Name::DefinitionTitleAfterOptionalWhitespace), + State::Next(Name::DefinitionTitleAfterOptionalWhitespace), ) } diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index fc2cbdf..482648f 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -39,8 +39,9 @@ //! [hard_break_trailing]: crate::construct::partial_whitespace //! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Start of a hard break (escape). /// @@ -54,7 +55,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => { tokenizer.enter(Token::HardBreakEscape); tokenizer.consume(); - State::Next(StateName::HardBreakEscapeAfter) + State::Next(Name::HardBreakEscapeAfter) } _ => State::Nok, } diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index d70f7db..12bc5b1 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -54,10 +54,11 @@ //! [wiki-setext]: https://en.wikipedia.org/wiki/Setext //! [atx]: http://www.aaronsw.com/2002/atx/ -use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE}; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{ContentType, Event, EventType, State, StateName, Tokenizer}; +use crate::tokenizer::{ContentType, Event, EventType, Tokenizer}; /// Start of a heading (atx). /// @@ -77,7 +78,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.attempt(name, State::Next(StateName::HeadingAtxBefore), State::Nok) + tokenizer.attempt(name, State::Next(Name::HeadingAtxBefore), State::Nok) } else { State::Nok } @@ -92,7 +93,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { if Some(b'#') == tokenizer.current { tokenizer.enter(Token::HeadingAtxSequence); - State::Retry(StateName::HeadingAtxSequenceOpen) + State::Retry(Name::HeadingAtxSequenceOpen) } else { State::Nok } @@ -109,18 +110,18 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => { tokenizer.tokenize_state.size = 0; tokenizer.exit(Token::HeadingAtxSequence); - State::Retry(StateName::HeadingAtxAtBreak) + State::Retry(Name::HeadingAtxAtBreak) } Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(StateName::HeadingAtxSequenceOpen) + State::Next(Name::HeadingAtxSequenceOpen) } _ if tokenizer.tokenize_state.size > 0 => { tokenizer.tokenize_state.size = 0; tokenizer.exit(Token::HeadingAtxSequence); let name = space_or_tab(tokenizer); - tokenizer.attempt(name, State::Next(StateName::HeadingAtxAtBreak), State::Nok) + tokenizer.attempt(name, State::Next(Name::HeadingAtxAtBreak), State::Nok) } _ => { tokenizer.tokenize_state.size = 0; @@ -146,15 +147,15 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } Some(b'\t' | b' ') => { let name = space_or_tab(tokenizer); - tokenizer.attempt(name, State::Next(StateName::HeadingAtxAtBreak), State::Nok) + tokenizer.attempt(name, State::Next(Name::HeadingAtxAtBreak), State::Nok) } Some(b'#') => { tokenizer.enter(Token::HeadingAtxSequence); - State::Retry(StateName::HeadingAtxSequenceFurther) + State::Retry(Name::HeadingAtxSequenceFurther) } Some(_) => { tokenizer.enter_with_content(Token::Data, Some(ContentType::Text)); - State::Retry(StateName::HeadingAtxData) + State::Retry(Name::HeadingAtxData) } } } @@ -170,10 +171,10 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { pub fn sequence_further(tokenizer: &mut Tokenizer) -> State { if let Some(b'#') = tokenizer.current { tokenizer.consume(); - State::Next(StateName::HeadingAtxSequenceFurther) + State::Next(Name::HeadingAtxSequenceFurther) } else { tokenizer.exit(Token::HeadingAtxSequence); - State::Retry(StateName::HeadingAtxAtBreak) + State::Retry(Name::HeadingAtxAtBreak) } } @@ -188,11 +189,11 @@ pub fn data(tokenizer: &mut Tokenizer) -> State { // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text. None | Some(b'\t' | b'\n' | b' ') => { tokenizer.exit(Token::Data); - State::Retry(StateName::HeadingAtxAtBreak) + State::Retry(Name::HeadingAtxAtBreak) } _ => { tokenizer.consume(); - State::Next(StateName::HeadingAtxData) + State::Next(Name::HeadingAtxData) } } } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index f653d75..8b45fff 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -59,8 +59,9 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{EventType, State, StateName, Tokenizer}; +use crate::tokenizer::{EventType, Tokenizer}; use crate::util::skip::opt_back as skip_opt_back; /// At a line ending, presumably an underline. @@ -93,11 +94,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { }, ); - tokenizer.attempt( - name, - State::Next(StateName::HeadingSetextBefore), - State::Nok, - ) + tokenizer.attempt(name, State::Next(Name::HeadingSetextBefore), State::Nok) } else { State::Nok } @@ -115,7 +112,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { Some(b'-' | b'=') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.enter(Token::HeadingSetextUnderline); - State::Retry(StateName::HeadingSetextInside) + State::Retry(Name::HeadingSetextInside) } _ => State::Nok, } @@ -132,7 +129,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-' | b'=') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.consume(); - State::Next(StateName::HeadingSetextInside) + State::Next(Name::HeadingSetextInside) } _ => { tokenizer.tokenize_state.marker = 0; @@ -140,8 +137,8 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(StateName::HeadingSetextAfter), - State::Next(StateName::HeadingSetextAfter), + State::Next(Name::HeadingSetextAfter), + State::Next(Name::HeadingSetextAfter), ) } } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 7a346e9..c9f8610 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -104,8 +104,9 @@ use crate::constant::{ use crate::construct::partial_space_or_tab::{ space_or_tab_with_options, Options as SpaceOrTabOptions, }; +use crate::state::{Name, State}; use crate::token::Token; -use crate::tokenizer::{State, StateName, Tokenizer}; +use crate::tokenizer::Tokenizer; use crate::util::slice::Slice; /// Symbol for `