diff options
Diffstat (limited to 'src')
42 files changed, 1313 insertions, 1300 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 4a9ec36..a2a43b0 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,11 +1,10 @@ //! Turn events into a string of HTML. use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC}; -use crate::token::Token; -use crate::tokenizer::{Event, EventType}; -use crate::util::normalize_identifier::normalize_identifier; +use crate::event::{Event, Kind, Name}; use crate::util::{ decode_character_reference::{decode_named, decode_numeric}, encode::encode, + normalize_identifier::normalize_identifier, sanitize_uri::sanitize_uri, skip, slice::{Position, Slice}, @@ -179,8 +178,8 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { while index < events.len() { let event = &events[index]; - if event.event_type == EventType::Exit - && (event.token_type == Token::BlankLineEnding || event.token_type == Token::LineEnding) + if event.kind == Kind::Exit + && (event.name == Name::BlankLineEnding || event.name == Name::LineEnding) { line_ending_inferred = Some(LineEnding::from_str( Slice::from_position(bytes, &Position::from_exit_event(events, index)).as_str(), @@ -216,8 +215,8 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { handle(&mut context, index); } - if event.token_type == Token::Definition { - if event.event_type == EventType::Enter { + if event.name == Name::Definition { + if event.kind == Kind::Enter { handle(&mut context, index); // Also handle start. definition_inside = true; definition_indices.push((index, index)); @@ -264,7 +263,7 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { fn handle(context: &mut CompileContext, index: usize) { context.index = index; - if context.events[index].event_type == EventType::Enter { + if context.events[index].kind == Kind::Enter { enter(context); } else { exit(context); @@ -273,87 +272,87 @@ fn handle(context: &mut CompileContext, index: usize) { /// Handle [`Enter`][EventType::Enter]. fn enter(context: &mut CompileContext) { - match context.events[context.index].token_type { - Token::CodeFencedFenceInfo - | Token::CodeFencedFenceMeta - | Token::DefinitionLabelString - | Token::DefinitionTitleString - | Token::HeadingAtxText - | Token::HeadingSetextText - | Token::Label - | Token::ReferenceString - | Token::ResourceTitleString => on_enter_buffer(context), - - Token::BlockQuote => on_enter_block_quote(context), - Token::CodeIndented => on_enter_code_indented(context), - Token::CodeFenced => on_enter_code_fenced(context), - Token::CodeText => on_enter_code_text(context), - Token::Definition => on_enter_definition(context), - Token::DefinitionDestinationString => on_enter_definition_destination_string(context), - Token::Emphasis => on_enter_emphasis(context), - Token::HtmlFlow => on_enter_html_flow(context), - Token::HtmlText => on_enter_html_text(context), - Token::Image => on_enter_image(context), - Token::Link => on_enter_link(context), - Token::ListItemMarker => on_enter_list_item_marker(context), - Token::ListOrdered | Token::ListUnordered => on_enter_list(context), - Token::Paragraph => on_enter_paragraph(context), - Token::Resource => on_enter_resource(context), - Token::ResourceDestinationString => on_enter_resource_destination_string(context), - Token::Strong => on_enter_strong(context), + match context.events[context.index].name { + Name::CodeFencedFenceInfo + | Name::CodeFencedFenceMeta + | Name::DefinitionLabelString + | Name::DefinitionTitleString + | Name::HeadingAtxText + | Name::HeadingSetextText + | Name::Label + | Name::ReferenceString + | Name::ResourceTitleString => on_enter_buffer(context), + + Name::BlockQuote => on_enter_block_quote(context), + Name::CodeIndented => on_enter_code_indented(context), + Name::CodeFenced => on_enter_code_fenced(context), + Name::CodeText => on_enter_code_text(context), + Name::Definition => on_enter_definition(context), + Name::DefinitionDestinationString => on_enter_definition_destination_string(context), + Name::Emphasis => on_enter_emphasis(context), + Name::HtmlFlow => on_enter_html_flow(context), + Name::HtmlText => on_enter_html_text(context), + Name::Image => on_enter_image(context), + Name::Link => on_enter_link(context), + Name::ListItemMarker => on_enter_list_item_marker(context), + Name::ListOrdered | Name::ListUnordered => on_enter_list(context), + Name::Paragraph => on_enter_paragraph(context), + Name::Resource => on_enter_resource(context), + Name::ResourceDestinationString => on_enter_resource_destination_string(context), + Name::Strong => on_enter_strong(context), _ => {} } } /// Handle [`Exit`][EventType::Exit]. fn exit(context: &mut CompileContext) { - match context.events[context.index].token_type { - Token::CodeFencedFenceMeta | Token::Resource => on_exit_drop(context), - Token::CharacterEscapeValue | Token::CodeTextData | Token::Data => on_exit_data(context), - - Token::AutolinkEmail => on_exit_autolink_email(context), - Token::AutolinkProtocol => on_exit_autolink_protocol(context), - Token::BlankLineEnding => on_exit_blank_line_ending(context), - Token::BlockQuote => on_exit_block_quote(context), - Token::CharacterReferenceMarker => on_exit_character_reference_marker(context), - Token::CharacterReferenceMarkerNumeric => { + match context.events[context.index].name { + Name::CodeFencedFenceMeta | Name::Resource => on_exit_drop(context), + Name::CharacterEscapeValue | Name::CodeTextData | Name::Data => on_exit_data(context), + + Name::AutolinkEmail => on_exit_autolink_email(context), + Name::AutolinkProtocol => on_exit_autolink_protocol(context), + Name::BlankLineEnding => on_exit_blank_line_ending(context), + Name::BlockQuote => on_exit_block_quote(context), + Name::CharacterReferenceMarker => on_exit_character_reference_marker(context), + Name::CharacterReferenceMarkerNumeric => { on_exit_character_reference_marker_numeric(context); } - Token::CharacterReferenceMarkerHexadecimal => { + Name::CharacterReferenceMarkerHexadecimal => { on_exit_character_reference_marker_hexadecimal(context); } - Token::CharacterReferenceValue => on_exit_character_reference_value(context), - Token::CodeFenced | Token::CodeIndented => on_exit_code_flow(context), - Token::CodeFencedFence => on_exit_code_fenced_fence(context), - Token::CodeFencedFenceInfo => on_exit_code_fenced_fence_info(context), - Token::CodeFlowChunk => on_exit_code_flow_chunk(context), - Token::CodeText => on_exit_code_text(context), - Token::Definition => on_exit_definition(context), - Token::DefinitionDestinationString => on_exit_definition_destination_string(context), - Token::DefinitionLabelString => on_exit_definition_label_string(context), - Token::DefinitionTitleString => on_exit_definition_title_string(context), - Token::Emphasis => on_exit_emphasis(context), - Token::HardBreakEscape | Token::HardBreakTrailing => on_exit_break(context), - Token::HeadingAtx => on_exit_heading_atx(context), - Token::HeadingAtxSequence => on_exit_heading_atx_sequence(context), - Token::HeadingAtxText => on_exit_heading_atx_text(context), - Token::HeadingSetextText => on_exit_heading_setext_text(context), - Token::HeadingSetextUnderline => on_exit_heading_setext_underline(context), - Token::HtmlFlow | Token::HtmlText => on_exit_html(context), - Token::HtmlFlowData | Token::HtmlTextData => on_exit_html_data(context), - Token::Image | Token::Link => on_exit_media(context), - Token::Label => on_exit_label(context), - Token::LabelText => on_exit_label_text(context), - Token::LineEnding => on_exit_line_ending(context), - Token::ListOrdered | Token::ListUnordered => on_exit_list(context), - Token::ListItem => on_exit_list_item(context), - Token::ListItemValue => on_exit_list_item_value(context), - Token::Paragraph => on_exit_paragraph(context), - Token::ReferenceString => on_exit_reference_string(context), - Token::ResourceDestinationString => on_exit_resource_destination_string(context), - Token::ResourceTitleString => on_exit_resource_title_string(context), - Token::Strong => on_exit_strong(context), - Token::ThematicBreak => on_exit_thematic_break(context), + Name::CharacterReferenceValue => on_exit_character_reference_value(context), + Name::CodeFenced | Name::CodeIndented => on_exit_code_flow(context), + Name::CodeFencedFence => on_exit_code_fenced_fence(context), + Name::CodeFencedFenceInfo => on_exit_code_fenced_fence_info(context), + Name::CodeFlowChunk => on_exit_code_flow_chunk(context), + Name::CodeText => on_exit_code_text(context), + Name::Definition => on_exit_definition(context), + Name::DefinitionDestinationString => on_exit_definition_destination_string(context), + Name::DefinitionLabelString => on_exit_definition_label_string(context), + Name::DefinitionTitleString => on_exit_definition_title_string(context), + Name::Emphasis => on_exit_emphasis(context), + Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context), + Name::HeadingAtx => on_exit_heading_atx(context), + Name::HeadingAtxSequence => on_exit_heading_atx_sequence(context), + Name::HeadingAtxText => on_exit_heading_atx_text(context), + Name::HeadingSetextText => on_exit_heading_setext_text(context), + Name::HeadingSetextUnderline => on_exit_heading_setext_underline(context), + Name::HtmlFlow | Name::HtmlText => on_exit_html(context), + Name::HtmlFlowData | Name::HtmlTextData => on_exit_html_data(context), + Name::Image | Name::Link => on_exit_media(context), + Name::Label => on_exit_label(context), + Name::LabelText => on_exit_label_text(context), + Name::LineEnding => on_exit_line_ending(context), + Name::ListOrdered | Name::ListUnordered => on_exit_list(context), + Name::ListItem => on_exit_list_item(context), + Name::ListItemValue => on_exit_list_item_value(context), + Name::Paragraph => on_exit_paragraph(context), + Name::ReferenceString => on_exit_reference_string(context), + Name::ResourceDestinationString => on_exit_resource_destination_string(context), + Name::ResourceTitleString => on_exit_resource_title_string(context), + Name::Strong => on_exit_strong(context), + Name::ThematicBreak => on_exit_thematic_break(context), _ => {} } } @@ -469,17 +468,17 @@ fn on_enter_list(context: &mut CompileContext) { let mut index = context.index; let mut balance = 0; let mut loose = false; - let token_type = &events[index].token_type; + let name = &events[index].name; while index < events.len() { let event = &events[index]; - if event.event_type == EventType::Enter { + if event.kind == Kind::Enter { balance += 1; } else { balance -= 1; - if balance < 3 && event.token_type == Token::BlankLineEnding { + if balance < 3 && event.name == Name::BlankLineEnding { // Blank line directly after a prefix: // // ```markdown @@ -508,29 +507,29 @@ fn on_enter_list(context: &mut CompileContext) { if balance == 1 { let mut before = index - 2; - if events[before].token_type == Token::ListItem { + if events[before].name == Name::ListItem { before -= 1; - if events[before].token_type == Token::SpaceOrTab { + if events[before].name == Name::SpaceOrTab { before -= 2; } - if events[before].token_type == Token::BlockQuote - && events[before - 1].token_type == Token::BlockQuotePrefix + if events[before].name == Name::BlockQuote + && events[before - 1].name == Name::BlockQuotePrefix { at_empty_block_quote = true; - } else if events[before].token_type == Token::ListItemPrefix { + } else if events[before].name == Name::ListItemPrefix { at_empty_list_item = true; } } } else { let mut before = index - 2; - if events[before].token_type == Token::SpaceOrTab { + if events[before].name == Name::SpaceOrTab { before -= 2; } - if events[before].token_type == Token::ListItemPrefix { + if events[before].name == Name::ListItemPrefix { at_prefix = true; } } @@ -542,7 +541,7 @@ fn on_enter_list(context: &mut CompileContext) { } // Done. - if balance == 0 && event.token_type == *token_type { + if balance == 0 && event.name == *name { break; } } @@ -553,7 +552,7 @@ fn on_enter_list(context: &mut CompileContext) { context.tight_stack.push(!loose); context.line_ending_if_needed(); // Note: no `>`. - context.push(if *token_type == Token::ListOrdered { + context.push(if *name == Name::ListOrdered { "<ol" } else { "<ul" @@ -766,7 +765,7 @@ fn on_exit_code_flow(context: &mut CompileContext) { // In a container. && !context.tight_stack.is_empty() // Empty (as the closing is right at the opening fence) - && context.events[context.index - 1].token_type != Token::CodeFencedFence + && context.events[context.index - 1].name != Name::CodeFencedFence { context.line_ending(); } @@ -1001,13 +1000,11 @@ fn on_exit_line_ending(context: &mut CompileContext) { fn on_exit_list(context: &mut CompileContext) { context.tight_stack.pop(); context.line_ending(); - context.push( - if context.events[context.index].token_type == Token::ListOrdered { - "</ol>" - } else { - "</ul>" - }, - ); + context.push(if context.events[context.index].name == Name::ListOrdered { + "</ol>" + } else { + "</ul>" + }); } /// Handle [`Exit`][EventType::Exit]:[`ListItem`][Token::ListItem]. @@ -1017,15 +1014,15 @@ fn on_exit_list_item(context: &mut CompileContext) { context.events, context.index - 1, &[ - Token::BlankLineEnding, - Token::LineEnding, - Token::SpaceOrTab, - Token::BlockQuotePrefix, + Name::BlankLineEnding, + Name::LineEnding, + Name::SpaceOrTab, + Name::BlockQuotePrefix, ], ); let previous = &context.events[before_item]; - let tight_paragraph = *tight && previous.token_type == Token::Paragraph; - let empty_item = previous.token_type == Token::ListItemPrefix; + let tight_paragraph = *tight && previous.name == Name::Paragraph; + let empty_item = previous.name == Name::ListItemPrefix; context.slurp_one_line_ending = false; diff --git a/src/construct/attention.rs b/src/construct/attention.rs index 7e873ca..ac2ef25 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -51,9 +51,9 @@ //! [html-em]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-em-element //! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element -use crate::state::{Name, State}; -use crate::token::Token; -use crate::tokenizer::{Event, EventType, Point, Tokenizer}; +use crate::event::{Event, Kind, Name, Point}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; use crate::unicode::PUNCTUATION; use crate::util::slice::Slice; @@ -120,8 +120,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'*' | b'_') if tokenizer.parse_state.constructs.attention => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); - tokenizer.enter(Token::AttentionSequence); - State::Retry(Name::AttentionInside) + tokenizer.enter(Name::AttentionSequence); + State::Retry(StateName::AttentionInside) } _ => State::Nok, } @@ -137,10 +137,10 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.consume(); - State::Next(Name::AttentionInside) + State::Next(StateName::AttentionInside) } _ => { - tokenizer.exit(Token::AttentionSequence); + tokenizer.exit(Name::AttentionSequence); tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention)); tokenizer.tokenize_state.marker = b'\0'; State::Ok @@ -159,10 +159,10 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { while start < tokenizer.events.len() { let enter = &tokenizer.events[start]; - if enter.event_type == EventType::Enter { + if enter.kind == Kind::Enter { balance += 1; - if enter.token_type == Token::AttentionSequence { + if enter.name == Name::AttentionSequence { let end = start + 1; let exit = &tokenizer.events[end]; @@ -337,41 +337,41 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { 0, vec![ Event { - event_type: EventType::Enter, - token_type: if take == 1 { - Token::Emphasis + kind: Kind::Enter, + name: if take == 1 { + Name::Emphasis } else { - Token::Strong + Name::Strong }, point: seq_open_enter.clone(), link: None, }, Event { - event_type: EventType::Enter, - token_type: if take == 1 { - Token::EmphasisSequence + kind: Kind::Enter, + name: if take == 1 { + Name::EmphasisSequence } else { - Token::StrongSequence + Name::StrongSequence }, point: seq_open_enter.clone(), link: None, }, Event { - event_type: EventType::Exit, - token_type: if take == 1 { - Token::EmphasisSequence + kind: Kind::Exit, + name: if take == 1 { + Name::EmphasisSequence } else { - Token::StrongSequence + Name::StrongSequence }, point: seq_open_exit.clone(), link: None, }, Event { - event_type: EventType::Enter, - token_type: if take == 1 { - Token::EmphasisText + kind: Kind::Enter, + name: if take == 1 { + Name::EmphasisText } else { - Token::StrongText + Name::StrongText }, point: seq_open_exit.clone(), link: None, @@ -384,41 +384,41 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { 0, vec![ Event { - event_type: EventType::Exit, - token_type: if take == 1 { - Token::EmphasisText + kind: Kind::Exit, + name: if take == 1 { + Name::EmphasisText } else { - Token::StrongText + Name::StrongText }, point: seq_close_enter.clone(), link: None, }, Event { - event_type: EventType::Enter, - token_type: if take == 1 { - Token::EmphasisSequence + kind: Kind::Enter, + name: if take == 1 { + Name::EmphasisSequence } else { - Token::StrongSequence + Name::StrongSequence }, point: seq_close_enter.clone(), link: None, }, Event { - event_type: EventType::Exit, - token_type: if take == 1 { - Token::EmphasisSequence + kind: Kind::Exit, + name: if take == 1 { + Name::EmphasisSequence } else { - Token::StrongSequence + Name::StrongSequence }, point: seq_close_exit.clone(), link: None, }, Event { - event_type: EventType::Exit, - token_type: if take == 1 { - Token::Emphasis + kind: Kind::Exit, + name: if take == 1 { + Name::Emphasis } else { - Token::Strong + Name::Strong }, point: seq_close_exit.clone(), link: None, @@ -438,8 +438,8 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { let mut index = 0; while index < sequences.len() { let sequence = &sequences[index]; - tokenizer.events[sequence.event_index].token_type = Token::Data; - tokenizer.events[sequence.event_index + 1].token_type = Token::Data; + tokenizer.events[sequence.event_index].name = Name::Data; + tokenizer.events[sequence.event_index + 1].name = Name::Data; index += 1; } diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index b635d96..c16b7c0 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -102,8 +102,8 @@ //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX}; -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of an autolink. @@ -117,12 +117,12 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'<') if tokenizer.parse_state.constructs.autolink => { - tokenizer.enter(Token::Autolink); - tokenizer.enter(Token::AutolinkMarker); + tokenizer.enter(Name::Autolink); + tokenizer.enter(Name::AutolinkMarker); tokenizer.consume(); - tokenizer.exit(Token::AutolinkMarker); - tokenizer.enter(Token::AutolinkProtocol); - State::Next(Name::AutolinkOpen) + tokenizer.exit(Name::AutolinkMarker); + tokenizer.enter(Name::AutolinkProtocol); + State::Next(StateName::AutolinkOpen) } _ => State::Nok, } @@ -141,9 +141,9 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { // ASCII alphabetic. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::AutolinkSchemeOrEmailAtext) + State::Next(StateName::AutolinkSchemeOrEmailAtext) } - _ => State::Retry(Name::AutolinkEmailAtext), + _ => State::Retry(StateName::AutolinkEmailAtext), } } @@ -161,9 +161,9 @@ pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { // Count the previous alphabetical from `open` too. tokenizer.tokenize_state.size = 1; - State::Retry(Name::AutolinkSchemeInsideOrEmailAtext) + State::Retry(StateName::AutolinkSchemeInsideOrEmailAtext) } - _ => State::Retry(Name::AutolinkEmailAtext), + _ => State::Retry(StateName::AutolinkEmailAtext), } } @@ -180,7 +180,7 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { Some(b':') => { tokenizer.consume(); tokenizer.tokenize_state.size = 0; - State::Next(Name::AutolinkUrlInside) + State::Next(StateName::AutolinkUrlInside) } // ASCII alphanumeric and `+`, `-`, and `.`. Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') @@ -188,11 +188,11 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(Name::AutolinkSchemeInsideOrEmailAtext) + State::Next(StateName::AutolinkSchemeInsideOrEmailAtext) } _ => { tokenizer.tokenize_state.size = 0; - State::Retry(Name::AutolinkEmailAtext) + State::Retry(StateName::AutolinkEmailAtext) } } } @@ -206,18 +206,18 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { pub fn url_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { - tokenizer.exit(Token::AutolinkProtocol); - tokenizer.enter(Token::AutolinkMarker); + tokenizer.exit(Name::AutolinkProtocol); + tokenizer.enter(Name::AutolinkMarker); tokenizer.consume(); - tokenizer.exit(Token::AutolinkMarker); - tokenizer.exit(Token::Autolink); + tokenizer.exit(Name::AutolinkMarker); + tokenizer.exit(Name::Autolink); State::Ok } // ASCII control, space, or `<`. None | Some(b'\0'..=0x1F | b' ' | b'<' | 0x7F) => State::Nok, Some(_) => { tokenizer.consume(); - State::Next(Name::AutolinkUrlInside) + State::Next(StateName::AutolinkUrlInside) } } } @@ -232,7 +232,7 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'@') => { tokenizer.consume(); - State::Next(Name::AutolinkEmailAtSignOrDot) + State::Next(StateName::AutolinkEmailAtSignOrDot) } // ASCII atext. // @@ -255,7 +255,7 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State { b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~', ) => { tokenizer.consume(); - State::Next(Name::AutolinkEmailAtext) + State::Next(StateName::AutolinkEmailAtext) } _ => State::Nok, } @@ -270,7 +270,9 @@ pub fn email_atext(tokenizer: &mut Tokenizer) -> State { pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII alphanumeric. - Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => State::Retry(Name::AutolinkEmailValue), + Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { + State::Retry(StateName::AutolinkEmailValue) + } _ => State::Nok, } } @@ -286,22 +288,22 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State { Some(b'.') => { tokenizer.tokenize_state.size = 0; tokenizer.consume(); - State::Next(Name::AutolinkEmailAtSignOrDot) + State::Next(StateName::AutolinkEmailAtSignOrDot) } Some(b'>') => { tokenizer.tokenize_state.size = 0; let index = tokenizer.events.len(); - tokenizer.exit(Token::AutolinkProtocol); + tokenizer.exit(Name::AutolinkProtocol); // Change the token type. - tokenizer.events[index - 1].token_type = Token::AutolinkEmail; - tokenizer.events[index].token_type = Token::AutolinkEmail; - tokenizer.enter(Token::AutolinkMarker); + tokenizer.events[index - 1].name = Name::AutolinkEmail; + tokenizer.events[index].name = Name::AutolinkEmail; + tokenizer.enter(Name::AutolinkMarker); tokenizer.consume(); - tokenizer.exit(Token::AutolinkMarker); - tokenizer.exit(Token::Autolink); + tokenizer.exit(Name::AutolinkMarker); + tokenizer.exit(Name::Autolink); State::Ok } - _ => State::Retry(Name::AutolinkEmailValue), + _ => State::Retry(StateName::AutolinkEmailValue), } } @@ -320,9 +322,9 @@ pub fn email_value(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.size < AUTOLINK_DOMAIN_SIZE_MAX => { let name = if matches!(tokenizer.current, Some(b'-')) { - Name::AutolinkEmailValue + StateName::AutolinkEmailValue } else { - Name::AutolinkEmailLabel + StateName::AutolinkEmailLabel }; tokenizer.tokenize_state.size += 1; tokenizer.consume(); diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index e8a06e9..2ea986d 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -33,7 +33,7 @@ //! [flow]: crate::content::flow use crate::construct::partial_space_or_tab::space_or_tab; -use crate::state::{Name, State}; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of a blank line. @@ -50,8 +50,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::BlankLineAfter), - State::Next(Name::BlankLineAfter), + State::Next(StateName::BlankLineAfter), + State::Next(StateName::BlankLineAfter), ) } diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 7b8ce82..a32375e 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -35,8 +35,8 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::space_or_tab_min_max; -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of block quote. @@ -56,7 +56,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.attempt(name, State::Next(Name::BlockQuoteBefore), State::Nok) + tokenizer.attempt(name, State::Next(StateName::BlockQuoteBefore), State::Nok) } else { State::Nok } @@ -71,10 +71,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { - tokenizer.enter(Token::BlockQuote); - State::Retry(Name::BlockQuoteContBefore) + tokenizer.enter(Name::BlockQuote); + State::Retry(StateName::BlockQuoteContBefore) } - _ => State::Retry(Name::BlockQuoteContBefore), + _ => State::Retry(StateName::BlockQuoteContBefore), } } @@ -95,7 +95,11 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.attempt(name, State::Next(Name::BlockQuoteContBefore), State::Nok) + tokenizer.attempt( + name, + State::Next(StateName::BlockQuoteContBefore), + State::Nok, + ) } /// After whitespace, before `>`. @@ -108,11 +112,11 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State { pub fn cont_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { - tokenizer.enter(Token::BlockQuotePrefix); - tokenizer.enter(Token::BlockQuoteMarker); + tokenizer.enter(Name::BlockQuotePrefix); + tokenizer.enter(Name::BlockQuoteMarker); tokenizer.consume(); - tokenizer.exit(Token::BlockQuoteMarker); - State::Next(Name::BlockQuoteContAfter) + tokenizer.exit(Name::BlockQuoteMarker); + State::Next(StateName::BlockQuoteContAfter) } _ => State::Nok, } @@ -128,11 +132,11 @@ pub fn cont_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn cont_after(tokenizer: &mut Tokenizer) -> State { if let Some(b'\t' | b' ') = tokenizer.current { - tokenizer.enter(Token::SpaceOrTab); + tokenizer.enter(Name::SpaceOrTab); tokenizer.consume(); - tokenizer.exit(Token::SpaceOrTab); + tokenizer.exit(Name::SpaceOrTab); } - tokenizer.exit(Token::BlockQuotePrefix); + tokenizer.exit(Name::BlockQuotePrefix); State::Ok } diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index c3d5458..0cd7126 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -33,8 +33,8 @@ //! [character_reference]: crate::construct::character_reference //! [hard_break_escape]: crate::construct::hard_break_escape -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of a character escape. @@ -46,11 +46,11 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\\') if tokenizer.parse_state.constructs.character_escape => { - tokenizer.enter(Token::CharacterEscape); - tokenizer.enter(Token::CharacterEscapeMarker); + tokenizer.enter(Name::CharacterEscape); + tokenizer.enter(Name::CharacterEscapeMarker); tokenizer.consume(); - tokenizer.exit(Token::CharacterEscapeMarker); - State::Next(Name::CharacterEscapeInside) + tokenizer.exit(Name::CharacterEscapeMarker); + State::Next(StateName::CharacterEscapeInside) } _ => State::Nok, } @@ -62,15 +62,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | a\*b /// ^ /// ``` -// Name::CharacterEscapeInside pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // ASCII punctuation. Some(b'!'..=b'/' | b':'..=b'@' | b'['..=b'`' | b'{'..=b'~') => { - tokenizer.enter(Token::CharacterEscapeValue); + tokenizer.enter(Name::CharacterEscapeValue); tokenizer.consume(); - tokenizer.exit(Token::CharacterEscapeValue); - tokenizer.exit(Token::CharacterEscape); + tokenizer.exit(Name::CharacterEscapeValue); + tokenizer.exit(Name::CharacterEscape); State::Ok } _ => State::Nok, diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 435c115..0158acf 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -65,8 +65,8 @@ use crate::constant::{ CHARACTER_REFERENCES, CHARACTER_REFERENCE_DECIMAL_SIZE_MAX, CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, CHARACTER_REFERENCE_NAMED_SIZE_MAX, }; -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::slice::Slice; @@ -83,11 +83,11 @@ use crate::util::slice::Slice; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'&') if tokenizer.parse_state.constructs.character_reference => { - tokenizer.enter(Token::CharacterReference); - tokenizer.enter(Token::CharacterReferenceMarker); + tokenizer.enter(Name::CharacterReference); + tokenizer.enter(Name::CharacterReferenceMarker); tokenizer.consume(); - tokenizer.exit(Token::CharacterReferenceMarker); - State::Next(Name::CharacterReferenceOpen) + tokenizer.exit(Name::CharacterReferenceMarker); + State::Next(StateName::CharacterReferenceOpen) } _ => State::Nok, } @@ -104,17 +104,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | a	b /// ^ /// ``` -// Name::CharacterReferenceOpen pub fn open(tokenizer: &mut Tokenizer) -> State { if let Some(b'#') = tokenizer.current { - tokenizer.enter(Token::CharacterReferenceMarkerNumeric); + tokenizer.enter(Name::CharacterReferenceMarkerNumeric); tokenizer.consume(); - tokenizer.exit(Token::CharacterReferenceMarkerNumeric); - State::Next(Name::CharacterReferenceNumeric) + tokenizer.exit(Name::CharacterReferenceMarkerNumeric); + State::Next(StateName::CharacterReferenceNumeric) } else { tokenizer.tokenize_state.marker = b'&'; - tokenizer.enter(Token::CharacterReferenceValue); - State::Retry(Name::CharacterReferenceValue) + tokenizer.enter(Name::CharacterReferenceValue); + State::Retry(StateName::CharacterReferenceValue) } } @@ -127,19 +126,18 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { /// > | a	b /// ^ /// ``` -// Name::CharacterReferenceNumeric pub fn numeric(tokenizer: &mut Tokenizer) -> State { if let Some(b'x' | b'X') = tokenizer.current { - tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal); + tokenizer.enter(Name::CharacterReferenceMarkerHexadecimal); tokenizer.consume(); - tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal); - tokenizer.enter(Token::CharacterReferenceValue); + tokenizer.exit(Name::CharacterReferenceMarkerHexadecimal); + tokenizer.enter(Name::CharacterReferenceValue); tokenizer.tokenize_state.marker = b'x'; - State::Next(Name::CharacterReferenceValue) + State::Next(StateName::CharacterReferenceValue) } else { - tokenizer.enter(Token::CharacterReferenceValue); + tokenizer.enter(Name::CharacterReferenceValue); tokenizer.tokenize_state.marker = b'#'; - State::Retry(Name::CharacterReferenceValue) + State::Retry(StateName::CharacterReferenceValue) } } @@ -176,11 +174,11 @@ pub fn value(tokenizer: &mut Tokenizer) -> State { } } - tokenizer.exit(Token::CharacterReferenceValue); - tokenizer.enter(Token::CharacterReferenceMarkerSemi); + tokenizer.exit(Name::CharacterReferenceValue); + tokenizer.enter(Name::CharacterReferenceMarkerSemi); tokenizer.consume(); - tokenizer.exit(Token::CharacterReferenceMarkerSemi); - tokenizer.exit(Token::CharacterReference); + tokenizer.exit(Name::CharacterReferenceMarkerSemi); + tokenizer.exit(Name::CharacterReference); tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size = 0; return State::Ok; @@ -203,7 +201,7 @@ pub fn value(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.size < max && test(&byte) { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - return State::Next(Name::CharacterReferenceValue); + return State::Next(StateName::CharacterReferenceValue); } } diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 0ce8d02..9488f62 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -103,9 +103,9 @@ use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE}; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; -use crate::state::{Name, State}; -use crate::token::Token; -use crate::tokenizer::{ContentType, Tokenizer}; +use crate::event::{Content, Name}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; use crate::util::slice::{Position, Slice}; /// Start of fenced code. @@ -118,8 +118,8 @@ use crate::util::slice::{Position, Slice}; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.code_fenced { - tokenizer.enter(Token::CodeFenced); - tokenizer.enter(Token::CodeFencedFence); + tokenizer.enter(Name::CodeFenced); + tokenizer.enter(Name::CodeFencedFence); let name = space_or_tab_min_max( tokenizer, 0, @@ -131,7 +131,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); tokenizer.attempt( name, - State::Next(Name::CodeFencedBeforeSequenceOpen), + State::Next(StateName::CodeFencedBeforeSequenceOpen), State::Nok, ) } else { @@ -152,7 +152,7 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { let mut prefix = 0; if let Some(event) = tail { - if event.token_type == Token::SpaceOrTab { + if event.name == Name::SpaceOrTab { prefix = Slice::from_position( tokenizer.parse_state.bytes, &Position::from_exit_event(&tokenizer.events, tokenizer.events.len() - 1), @@ -164,8 +164,8 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { if let Some(b'`' | b'~') = tokenizer.current { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.tokenize_state.size_c = prefix; - tokenizer.enter(Token::CodeFencedFenceSequence); - State::Retry(Name::CodeFencedSequenceOpen) + tokenizer.enter(Name::CodeFencedFenceSequence); + State::Retry(StateName::CodeFencedSequenceOpen) } else { State::Nok } @@ -184,15 +184,15 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(Name::CodeFencedSequenceOpen) + State::Next(StateName::CodeFencedSequenceOpen) } _ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => { - tokenizer.exit(Token::CodeFencedFenceSequence); + tokenizer.exit(Name::CodeFencedFenceSequence); let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::CodeFencedInfoBefore), - State::Next(Name::CodeFencedInfoBefore), + State::Next(StateName::CodeFencedInfoBefore), + State::Next(StateName::CodeFencedInfoBefore), ) } _ => { @@ -215,19 +215,19 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { pub fn info_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::CodeFencedFence); + tokenizer.exit(Name::CodeFencedFence); // Do not form containers. tokenizer.concrete = true; tokenizer.check( - Name::NonLazyContinuationStart, - State::Next(Name::CodeFencedAtNonLazyBreak), - State::Next(Name::CodeFencedAfter), + StateName::NonLazyContinuationStart, + State::Next(StateName::CodeFencedAtNonLazyBreak), + State::Next(StateName::CodeFencedAfter), ) } _ => { - tokenizer.enter(Token::CodeFencedFenceInfo); - tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - State::Retry(Name::CodeFencedInfo) + tokenizer.enter(Name::CodeFencedFenceInfo); + tokenizer.enter_with_content(Name::Data, Some(Content::String)); + State::Retry(StateName::CodeFencedInfo) } } } @@ -243,18 +243,18 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State { pub fn info(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::Data); - tokenizer.exit(Token::CodeFencedFenceInfo); - State::Retry(Name::CodeFencedInfoBefore) + tokenizer.exit(Name::Data); + tokenizer.exit(Name::CodeFencedFenceInfo); + State::Retry(StateName::CodeFencedInfoBefore) } Some(b'\t' | b' ') => { - tokenizer.exit(Token::Data); - tokenizer.exit(Token::CodeFencedFenceInfo); + tokenizer.exit(Name::Data); + tokenizer.exit(Name::CodeFencedFenceInfo); let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::CodeFencedMetaBefore), - State::Next(Name::CodeFencedMetaBefore), + State::Next(StateName::CodeFencedMetaBefore), + State::Next(StateName::CodeFencedMetaBefore), ) } Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { @@ -266,7 +266,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { } Some(_) => { tokenizer.consume(); - State::Next(Name::CodeFencedInfo) + State::Next(StateName::CodeFencedInfo) } } } @@ -281,11 +281,11 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn meta_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => State::Retry(Name::CodeFencedInfoBefore), + None | Some(b'\n') => State::Retry(StateName::CodeFencedInfoBefore), _ => { - tokenizer.enter(Token::CodeFencedFenceMeta); - tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - State::Retry(Name::CodeFencedMeta) + tokenizer.enter(Name::CodeFencedFenceMeta); + tokenizer.enter_with_content(Name::Data, Some(Content::String)); + State::Retry(StateName::CodeFencedMeta) } } } @@ -301,9 +301,9 @@ pub fn meta_before(tokenizer: &mut Tokenizer) -> State { pub fn meta(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::Data); - tokenizer.exit(Token::CodeFencedFenceMeta); - State::Retry(Name::CodeFencedInfoBefore) + tokenizer.exit(Name::Data); + tokenizer.exit(Name::CodeFencedFenceMeta); + State::Retry(StateName::CodeFencedInfoBefore) } Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { tokenizer.concrete = false; @@ -314,7 +314,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Next(Name::CodeFencedMeta) + State::Next(StateName::CodeFencedMeta) } } } @@ -330,9 +330,9 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::CodeFencedCloseBefore, - State::Next(Name::CodeFencedAfter), - State::Next(Name::CodeFencedContentBefore), + StateName::CodeFencedCloseBefore, + State::Next(StateName::CodeFencedAfter), + State::Next(StateName::CodeFencedContentBefore), ) } @@ -347,10 +347,10 @@ pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { pub fn close_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::CodeFencedCloseStart) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::CodeFencedCloseStart) } _ => unreachable!("expected eol"), } @@ -365,7 +365,7 @@ pub fn close_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn close_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.enter(Token::CodeFencedFence); + tokenizer.enter(Name::CodeFencedFence); let name = space_or_tab_min_max( tokenizer, 0, @@ -377,7 +377,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { ); tokenizer.attempt( name, - State::Next(Name::CodeFencedBeforeSequenceClose), + State::Next(StateName::CodeFencedBeforeSequenceClose), State::Nok, ) } @@ -393,8 +393,8 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { - tokenizer.enter(Token::CodeFencedFenceSequence); - State::Retry(Name::CodeFencedSequenceClose) + tokenizer.enter(Name::CodeFencedFenceSequence); + State::Retry(StateName::CodeFencedSequenceClose) } _ => State::Nok, } @@ -413,18 +413,18 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.size_b += 1; tokenizer.consume(); - State::Next(Name::CodeFencedSequenceClose) + State::Next(StateName::CodeFencedSequenceClose) } _ if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size => { tokenizer.tokenize_state.size_b = 0; - tokenizer.exit(Token::CodeFencedFenceSequence); + tokenizer.exit(Name::CodeFencedFenceSequence); let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::CodeFencedAfterSequenceClose), - State::Next(Name::CodeFencedAfterSequenceClose), + State::Next(StateName::CodeFencedAfterSequenceClose), + State::Next(StateName::CodeFencedAfterSequenceClose), ) } _ => { @@ -445,7 +445,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::CodeFencedFence); + tokenizer.exit(Name::CodeFencedFence); State::Ok } _ => State::Nok, @@ -461,10 +461,10 @@ pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State { /// | ~~~ /// ``` pub fn content_before(tokenizer: &mut Tokenizer) -> State { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::CodeFencedContentStart) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::CodeFencedContentStart) } /// Before code content, definitely not before a closing fence. /// @@ -478,7 +478,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.size_c); tokenizer.attempt( name, - State::Next(Name::CodeFencedBeforeContentChunk), + State::Next(StateName::CodeFencedBeforeContentChunk), State::Nok, ) } @@ -494,13 +494,13 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => tokenizer.check( - Name::NonLazyContinuationStart, - State::Next(Name::CodeFencedAtNonLazyBreak), - State::Next(Name::CodeFencedAfter), + StateName::NonLazyContinuationStart, + State::Next(StateName::CodeFencedAtNonLazyBreak), + State::Next(StateName::CodeFencedAfter), ), _ => { - tokenizer.enter(Token::CodeFlowChunk); - State::Retry(Name::CodeFencedContentChunk) + tokenizer.enter(Name::CodeFlowChunk); + State::Retry(StateName::CodeFencedContentChunk) } } } @@ -516,12 +516,12 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { pub fn content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::CodeFlowChunk); - State::Retry(Name::CodeFencedBeforeContentChunk) + tokenizer.exit(Name::CodeFlowChunk); + State::Retry(StateName::CodeFencedBeforeContentChunk) } _ => { tokenizer.consume(); - State::Next(Name::CodeFencedContentChunk) + State::Next(StateName::CodeFencedContentChunk) } } } @@ -535,7 +535,7 @@ pub fn content_chunk(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { - tokenizer.exit(Token::CodeFenced); + tokenizer.exit(Name::CodeFenced); tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index f442f27..7297759 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -47,8 +47,8 @@ use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::TAB_SIZE; -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of code (indented). @@ -64,9 +64,13 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { // Do not interrupt paragraphs. if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented { - tokenizer.enter(Token::CodeIndented); + tokenizer.enter(Name::CodeIndented); let name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); - tokenizer.attempt(name, State::Next(Name::CodeIndentedAtBreak), State::Nok) + tokenizer.attempt( + name, + State::Next(StateName::CodeIndentedAtBreak), + State::Nok, + ) } else { State::Nok } @@ -80,15 +84,15 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None => State::Retry(Name::CodeIndentedAfter), + None => State::Retry(StateName::CodeIndentedAfter), Some(b'\n') => tokenizer.attempt( - Name::CodeIndentedFurtherStart, - State::Next(Name::CodeIndentedAtBreak), - State::Next(Name::CodeIndentedAfter), + StateName::CodeIndentedFurtherStart, + State::Next(StateName::CodeIndentedAtBreak), + State::Next(StateName::CodeIndentedAfter), ), _ => { - tokenizer.enter(Token::CodeFlowChunk); - State::Retry(Name::CodeIndentedInside) + tokenizer.enter(Name::CodeFlowChunk); + State::Retry(StateName::CodeIndentedInside) } } } @@ -102,12 +106,12 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::CodeFlowChunk); - State::Retry(Name::CodeIndentedAtBreak) + tokenizer.exit(Name::CodeFlowChunk); + State::Retry(StateName::CodeIndentedAtBreak) } _ => { tokenizer.consume(); - State::Next(Name::CodeIndentedInside) + State::Next(StateName::CodeIndentedInside) } } } @@ -119,7 +123,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { - tokenizer.exit(Token::CodeIndented); + tokenizer.exit(Name::CodeIndented); // Feel free to interrupt. tokenizer.interrupt = false; State::Ok @@ -135,17 +139,17 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { pub fn further_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') if !tokenizer.lazy => { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::CodeIndentedFurtherStart) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::CodeIndentedFurtherStart) } _ if !tokenizer.lazy => { let name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE); tokenizer.attempt( name, - State::Next(Name::CodeIndentedFurtherEnd), - State::Next(Name::CodeIndentedFurtherBegin), + State::Next(StateName::CodeIndentedFurtherEnd), + State::Next(StateName::CodeIndentedFurtherBegin), ) } _ => State::Nok, @@ -174,8 +178,8 @@ pub fn further_begin(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::CodeIndentedFurtherAfter), - State::Next(Name::CodeIndentedFurtherAfter), + State::Next(StateName::CodeIndentedFurtherAfter), + State::Next(StateName::CodeIndentedFurtherAfter), ) } @@ -188,7 +192,7 @@ pub fn further_begin(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn further_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\n') => State::Retry(Name::CodeIndentedFurtherStart), + Some(b'\n') => State::Retry(StateName::CodeIndentedFurtherStart), _ => State::Nok, } } diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index f626cfb..f48f63c 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -83,8 +83,8 @@ //! [code_fenced]: crate::construct::code_fenced //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of code (text). @@ -101,12 +101,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.code_text && (tokenizer.previous != Some(b'`') || (!tokenizer.events.is_empty() - && tokenizer.events[tokenizer.events.len() - 1].token_type - == Token::CharacterEscape)) => + && tokenizer.events[tokenizer.events.len() - 1].name + == Name::CharacterEscape)) => { - tokenizer.enter(Token::CodeText); - tokenizer.enter(Token::CodeTextSequence); - State::Retry(Name::CodeTextSequenceOpen) + tokenizer.enter(Name::CodeText); + tokenizer.enter(Name::CodeTextSequence); + State::Retry(StateName::CodeTextSequenceOpen) } _ => State::Nok, } @@ -122,10 +122,10 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { if let Some(b'`') = tokenizer.current { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(Name::CodeTextSequenceOpen) + State::Next(StateName::CodeTextSequenceOpen) } else { - tokenizer.exit(Token::CodeTextSequence); - State::Retry(Name::CodeTextBetween) + tokenizer.exit(Name::CodeTextSequence); + State::Retry(StateName::CodeTextBetween) } } @@ -142,18 +142,18 @@ pub fn between(tokenizer: &mut Tokenizer) -> State { State::Nok } Some(b'\n') => { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::CodeTextBetween) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::CodeTextBetween) } Some(b'`') => { - tokenizer.enter(Token::CodeTextSequence); - State::Retry(Name::CodeTextSequenceClose) + tokenizer.enter(Name::CodeTextSequence); + State::Retry(StateName::CodeTextSequenceClose) } _ => { - tokenizer.enter(Token::CodeTextData); - State::Retry(Name::CodeTextData) + tokenizer.enter(Name::CodeTextData); + State::Retry(StateName::CodeTextData) } } } @@ -167,12 +167,12 @@ pub fn between(tokenizer: &mut Tokenizer) -> State { pub fn data(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n' | b'`') => { - tokenizer.exit(Token::CodeTextData); - State::Retry(Name::CodeTextBetween) + tokenizer.exit(Name::CodeTextData); + State::Retry(StateName::CodeTextBetween) } _ => { tokenizer.consume(); - State::Next(Name::CodeTextData) + State::Next(StateName::CodeTextData) } } } @@ -188,23 +188,23 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { Some(b'`') => { tokenizer.tokenize_state.size_b += 1; tokenizer.consume(); - State::Next(Name::CodeTextSequenceClose) + State::Next(StateName::CodeTextSequenceClose) } _ => { if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_b { - tokenizer.exit(Token::CodeTextSequence); - tokenizer.exit(Token::CodeText); + tokenizer.exit(Name::CodeTextSequence); + tokenizer.exit(Name::CodeText); tokenizer.tokenize_state.size = 0; tokenizer.tokenize_state.size_b = 0; State::Ok } else { let index = tokenizer.events.len(); - tokenizer.exit(Token::CodeTextSequence); + tokenizer.exit(Name::CodeTextSequence); // More or less accents: mark as data. - tokenizer.events[index - 1].token_type = Token::CodeTextData; - tokenizer.events[index].token_type = Token::CodeTextData; + tokenizer.events[index - 1].name = Name::CodeTextData; + tokenizer.events[index].name = Name::CodeTextData; tokenizer.tokenize_state.size_b = 0; - State::Retry(Name::CodeTextBetween) + State::Retry(StateName::CodeTextBetween) } } } diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 394375f..2533a1c 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -94,8 +94,8 @@ //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_eol}; -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::skip::opt_back as skip_opt_back; @@ -112,19 +112,19 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { && tokenizer.events[skip_opt_back( &tokenizer.events, tokenizer.events.len() - 1, - &[Token::LineEnding, Token::SpaceOrTab], + &[Name::LineEnding, Name::SpaceOrTab], )] - .token_type - == Token::Definition); + .name + == Name::Definition); if possible && tokenizer.parse_state.constructs.definition { - tokenizer.enter(Token::Definition); + tokenizer.enter(Name::Definition); // Note: arbitrary whitespace allowed even if code (indented) is on. let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::DefinitionBefore), - State::Next(Name::DefinitionBefore), + State::Next(StateName::DefinitionBefore), + State::Next(StateName::DefinitionBefore), ) } else { State::Nok @@ -140,12 +140,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { - tokenizer.tokenize_state.token_1 = Token::DefinitionLabel; - tokenizer.tokenize_state.token_2 = Token::DefinitionLabelMarker; - tokenizer.tokenize_state.token_3 = Token::DefinitionLabelString; + tokenizer.tokenize_state.token_1 = Name::DefinitionLabel; + tokenizer.tokenize_state.token_2 = Name::DefinitionLabelMarker; + tokenizer.tokenize_state.token_3 = Name::DefinitionLabelString; tokenizer.attempt( - Name::LabelStart, - State::Next(Name::DefinitionLabelAfter), + StateName::LabelStart, + State::Next(StateName::DefinitionLabelAfter), State::Nok, ) } @@ -160,16 +160,16 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn label_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::Data; - tokenizer.tokenize_state.token_2 = Token::Data; - tokenizer.tokenize_state.token_3 = Token::Data; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; match tokenizer.current { Some(b':') => { - tokenizer.enter(Token::DefinitionMarker); + tokenizer.enter(Name::DefinitionMarker); tokenizer.consume(); - tokenizer.exit(Token::DefinitionMarker); - State::Next(Name::DefinitionMarkerAfter) + tokenizer.exit(Name::DefinitionMarker); + State::Next(StateName::DefinitionMarkerAfter) } _ => State::Nok, } @@ -185,8 +185,8 @@ pub fn marker_after(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, - State::Next(Name::DefinitionDestinationBefore), - State::Next(Name::DefinitionDestinationBefore), + State::Next(StateName::DefinitionDestinationBefore), + State::Next(StateName::DefinitionDestinationBefore), ) } @@ -197,16 +197,16 @@ pub fn marker_after(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn destination_before(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::DefinitionDestination; - tokenizer.tokenize_state.token_2 = Token::DefinitionDestinationLiteral; - tokenizer.tokenize_state.token_3 = Token::DefinitionDestinationLiteralMarker; - tokenizer.tokenize_state.token_4 = Token::DefinitionDestinationRaw; - tokenizer.tokenize_state.token_5 = Token::DefinitionDestinationString; + tokenizer.tokenize_state.token_1 = Name::DefinitionDestination; + tokenizer.tokenize_state.token_2 = Name::DefinitionDestinationLiteral; + tokenizer.tokenize_state.token_3 = Name::DefinitionDestinationLiteralMarker; + tokenizer.tokenize_state.token_4 = Name::DefinitionDestinationRaw; + tokenizer.tokenize_state.token_5 = Name::DefinitionDestinationString; tokenizer.tokenize_state.size_b = usize::MAX; tokenizer.attempt( - Name::DestinationStart, - State::Next(Name::DefinitionDestinationAfter), - State::Next(Name::DefinitionDestinationMissing), + StateName::DestinationStart, + State::Next(StateName::DefinitionDestinationAfter), + State::Next(StateName::DefinitionDestinationMissing), ) } @@ -217,26 +217,26 @@ pub fn destination_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn destination_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::Data; - tokenizer.tokenize_state.token_2 = Token::Data; - tokenizer.tokenize_state.token_3 = Token::Data; - tokenizer.tokenize_state.token_4 = Token::Data; - tokenizer.tokenize_state.token_5 = Token::Data; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; tokenizer.tokenize_state.size_b = 0; tokenizer.attempt( - Name::DefinitionTitleBefore, - State::Next(Name::DefinitionAfter), - State::Next(Name::DefinitionAfter), + StateName::DefinitionTitleBefore, + State::Next(StateName::DefinitionAfter), + State::Next(StateName::DefinitionAfter), ) } /// Without destination. pub fn destination_missing(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::Data; - tokenizer.tokenize_state.token_2 = Token::Data; - tokenizer.tokenize_state.token_3 = Token::Data; - tokenizer.tokenize_state.token_4 = Token::Data; - tokenizer.tokenize_state.token_5 = Token::Data; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; tokenizer.tokenize_state.size_b = 0; State::Nok } @@ -253,8 +253,8 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::DefinitionAfterWhitespace), - State::Next(Name::DefinitionAfterWhitespace), + State::Next(StateName::DefinitionAfterWhitespace), + State::Next(StateName::DefinitionAfterWhitespace), ) } @@ -269,7 +269,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::Definition); + tokenizer.exit(Name::Definition); // You’d be interrupting. tokenizer.interrupt = true; State::Ok @@ -290,7 +290,7 @@ pub fn title_before(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, - State::Next(Name::DefinitionTitleBeforeMarker), + State::Next(StateName::DefinitionTitleBeforeMarker), State::Nok, ) } @@ -303,12 +303,12 @@ pub fn title_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::DefinitionTitle; - tokenizer.tokenize_state.token_2 = Token::DefinitionTitleMarker; - tokenizer.tokenize_state.token_3 = Token::DefinitionTitleString; + tokenizer.tokenize_state.token_1 = Name::DefinitionTitle; + tokenizer.tokenize_state.token_2 = Name::DefinitionTitleMarker; + tokenizer.tokenize_state.token_3 = Name::DefinitionTitleString; tokenizer.attempt( - Name::TitleStart, - State::Next(Name::DefinitionTitleAfter), + StateName::TitleStart, + State::Next(StateName::DefinitionTitleAfter), State::Nok, ) } @@ -320,14 +320,14 @@ pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn title_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::Data; - tokenizer.tokenize_state.token_2 = Token::Data; - tokenizer.tokenize_state.token_3 = Token::Data; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::DefinitionTitleAfterOptionalWhitespace), - State::Next(Name::DefinitionTitleAfterOptionalWhitespace), + State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), + State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), ) } diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index 482648f..9af5b83 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -39,8 +39,8 @@ //! [hard_break_trailing]: crate::construct::partial_whitespace //! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of a hard break (escape). @@ -53,9 +53,9 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => { - tokenizer.enter(Token::HardBreakEscape); + tokenizer.enter(Name::HardBreakEscape); tokenizer.consume(); - State::Next(Name::HardBreakEscapeAfter) + State::Next(StateName::HardBreakEscapeAfter) } _ => State::Nok, } @@ -71,7 +71,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { - tokenizer.exit(Token::HardBreakEscape); + tokenizer.exit(Name::HardBreakEscape); State::Ok } _ => State::Nok, diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 12bc5b1..e856ac3 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -56,9 +56,9 @@ use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE}; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; -use crate::state::{Name, State}; -use crate::token::Token; -use crate::tokenizer::{ContentType, Event, EventType, Tokenizer}; +use crate::event::{Content, Event, Kind, Name}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; /// Start of a heading (atx). /// @@ -68,7 +68,7 @@ use crate::tokenizer::{ContentType, Event, EventType, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.heading_atx { - tokenizer.enter(Token::HeadingAtx); + tokenizer.enter(Name::HeadingAtx); let name = space_or_tab_min_max( tokenizer, 0, @@ -78,7 +78,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.attempt(name, State::Next(Name::HeadingAtxBefore), State::Nok) + tokenizer.attempt(name, State::Next(StateName::HeadingAtxBefore), State::Nok) } else { State::Nok } @@ -92,8 +92,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before(tokenizer: &mut Tokenizer) -> State { if Some(b'#') == tokenizer.current { - tokenizer.enter(Token::HeadingAtxSequence); - State::Retry(Name::HeadingAtxSequenceOpen) + tokenizer.enter(Name::HeadingAtxSequence); + State::Retry(StateName::HeadingAtxSequenceOpen) } else { State::Nok } @@ -109,19 +109,19 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => { tokenizer.tokenize_state.size = 0; - tokenizer.exit(Token::HeadingAtxSequence); - State::Retry(Name::HeadingAtxAtBreak) + tokenizer.exit(Name::HeadingAtxSequence); + State::Retry(StateName::HeadingAtxAtBreak) } Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(Name::HeadingAtxSequenceOpen) + State::Next(StateName::HeadingAtxSequenceOpen) } _ if tokenizer.tokenize_state.size > 0 => { tokenizer.tokenize_state.size = 0; - tokenizer.exit(Token::HeadingAtxSequence); + tokenizer.exit(Name::HeadingAtxSequence); let name = space_or_tab(tokenizer); - tokenizer.attempt(name, State::Next(Name::HeadingAtxAtBreak), State::Nok) + tokenizer.attempt(name, State::Next(StateName::HeadingAtxAtBreak), State::Nok) } _ => { tokenizer.tokenize_state.size = 0; @@ -139,7 +139,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::HeadingAtx); + tokenizer.exit(Name::HeadingAtx); tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve)); // Feel free to interrupt. tokenizer.interrupt = false; @@ -147,15 +147,15 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { } Some(b'\t' | b' ') => { let name = space_or_tab(tokenizer); - tokenizer.attempt(name, State::Next(Name::HeadingAtxAtBreak), State::Nok) + tokenizer.attempt(name, State::Next(StateName::HeadingAtxAtBreak), State::Nok) } Some(b'#') => { - tokenizer.enter(Token::HeadingAtxSequence); - State::Retry(Name::HeadingAtxSequenceFurther) + tokenizer.enter(Name::HeadingAtxSequence); + State::Retry(StateName::HeadingAtxSequenceFurther) } Some(_) => { - tokenizer.enter_with_content(Token::Data, Some(ContentType::Text)); - State::Retry(Name::HeadingAtxData) + tokenizer.enter_with_content(Name::Data, Some(Content::Text)); + State::Retry(StateName::HeadingAtxData) } } } @@ -171,10 +171,10 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { pub fn sequence_further(tokenizer: &mut Tokenizer) -> State { if let Some(b'#') = tokenizer.current { tokenizer.consume(); - State::Next(Name::HeadingAtxSequenceFurther) + State::Next(StateName::HeadingAtxSequenceFurther) } else { - tokenizer.exit(Token::HeadingAtxSequence); - State::Retry(Name::HeadingAtxAtBreak) + tokenizer.exit(Name::HeadingAtxSequence); + State::Retry(StateName::HeadingAtxAtBreak) } } @@ -188,12 +188,12 @@ pub fn data(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text. None | Some(b'\t' | b'\n' | b' ') => { - tokenizer.exit(Token::Data); - State::Retry(Name::HeadingAtxAtBreak) + tokenizer.exit(Name::Data); + State::Retry(StateName::HeadingAtxAtBreak) } _ => { tokenizer.consume(); - State::Next(Name::HeadingAtxData) + State::Next(StateName::HeadingAtxData) } } } @@ -208,8 +208,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) { while index < tokenizer.events.len() { let event = &tokenizer.events[index]; - if event.token_type == Token::HeadingAtx { - if event.event_type == EventType::Enter { + if event.name == Name::HeadingAtx { + if event.kind == Kind::Enter { heading_inside = true; } else { if let Some(start) = data_start { @@ -220,8 +220,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) { start, 0, vec![Event { - event_type: EventType::Enter, - token_type: Token::HeadingAtxText, + kind: Kind::Enter, + name: Name::HeadingAtxText, point: tokenizer.events[start].point.clone(), link: None, }], @@ -234,8 +234,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) { end + 1, 0, vec![Event { - event_type: EventType::Exit, - token_type: Token::HeadingAtxText, + kind: Kind::Exit, + name: Name::HeadingAtxText, point: tokenizer.events[end].point.clone(), link: None, }], @@ -246,8 +246,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) { data_start = None; data_end = None; } - } else if heading_inside && event.token_type == Token::Data { - if event.event_type == EventType::Enter { + } else if heading_inside && event.name == Name::Data { + if event.kind == Kind::Enter { if data_start.is_none() { data_start = Some(index); } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 8b45fff..3a24f9f 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -59,9 +59,9 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; -use crate::state::{Name, State}; -use crate::token::Token; -use crate::tokenizer::{EventType, Tokenizer}; +use crate::event::{Kind, Name}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; use crate::util::skip::opt_back as skip_opt_back; /// At a line ending, presumably an underline. @@ -79,10 +79,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { && tokenizer.events[skip_opt_back( &tokenizer.events, tokenizer.events.len() - 1, - &[Token::LineEnding, Token::SpaceOrTab], + &[Name::LineEnding, Name::SpaceOrTab], )] - .token_type - == Token::Paragraph) + .name + == Name::Paragraph) { let name = space_or_tab_min_max( tokenizer, @@ -94,7 +94,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { }, ); - tokenizer.attempt(name, State::Next(Name::HeadingSetextBefore), State::Nok) + tokenizer.attempt( + name, + State::Next(StateName::HeadingSetextBefore), + State::Nok, + ) } else { State::Nok } @@ -111,8 +115,8 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-' | b'=') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); - tokenizer.enter(Token::HeadingSetextUnderline); - State::Retry(Name::HeadingSetextInside) + tokenizer.enter(Name::HeadingSetextUnderline); + State::Retry(StateName::HeadingSetextInside) } _ => State::Nok, } @@ -129,16 +133,16 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-' | b'=') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.consume(); - State::Next(Name::HeadingSetextInside) + State::Next(StateName::HeadingSetextInside) } _ => { tokenizer.tokenize_state.marker = 0; - tokenizer.exit(Token::HeadingSetextUnderline); + tokenizer.exit(Name::HeadingSetextUnderline); let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::HeadingSetextAfter), - State::Next(Name::HeadingSetextAfter), + State::Next(StateName::HeadingSetextAfter), + State::Next(StateName::HeadingSetextAfter), ) } } @@ -173,28 +177,28 @@ pub fn resolve(tokenizer: &mut Tokenizer) { let event = &tokenizer.events[index]; // Find paragraphs. - if event.event_type == EventType::Enter { - if event.token_type == Token::Paragraph { + if event.kind == Kind::Enter { + if event.name == Name::Paragraph { paragraph_enter = Some(index); } - } else if event.token_type == Token::Paragraph { + } else if event.name == Name::Paragraph { paragraph_exit = Some(index); } // We know this is preceded by a paragraph. // Otherwise we don’t parse. - else if event.token_type == Token::HeadingSetextUnderline { + else if event.name == Name::HeadingSetextUnderline { let enter = paragraph_enter.take().unwrap(); let exit = paragraph_exit.take().unwrap(); // Change types of Enter:Paragraph, Exit:Paragraph. - tokenizer.events[enter].token_type = Token::HeadingSetextText; - tokenizer.events[exit].token_type = Token::HeadingSetextText; + tokenizer.events[enter].name = Name::HeadingSetextText; + tokenizer.events[exit].name = Name::HeadingSetextText; // Add Enter:HeadingSetext, Exit:HeadingSetext. let mut heading_enter = tokenizer.events[enter].clone(); - heading_enter.token_type = Token::HeadingSetext; + heading_enter.name = Name::HeadingSetext; let mut heading_exit = tokenizer.events[index].clone(); - heading_exit.token_type = Token::HeadingSetext; + heading_exit.name = Name::HeadingSetext; tokenizer.map.add(enter, 0, vec![heading_enter]); tokenizer.map.add(index + 1, 0, vec![heading_exit]); diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index c9f8610..8e5321f 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -104,8 +104,8 @@ use crate::constant::{ use crate::construct::partial_space_or_tab::{ space_or_tab_with_options, Options as SpaceOrTabOptions, }; -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::slice::Slice; @@ -132,11 +132,11 @@ const COMPLETE: u8 = 7; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.html_flow { - tokenizer.enter(Token::HtmlFlow); + tokenizer.enter(Name::HtmlFlow); let name = space_or_tab_with_options( tokenizer, SpaceOrTabOptions { - kind: Token::HtmlFlowData, + kind: Name::HtmlFlowData, min: 0, max: if tokenizer.parse_state.constructs.code_indented { TAB_SIZE - 1 @@ -148,7 +148,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { }, ); - tokenizer.attempt(name, State::Next(Name::HtmlFlowBefore), State::Nok) + tokenizer.attempt(name, State::Next(StateName::HtmlFlowBefore), State::Nok) } else { State::Nok } @@ -162,9 +162,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before(tokenizer: &mut Tokenizer) -> State { if Some(b'<') == tokenizer.current { - tokenizer.enter(Token::HtmlFlowData); + tokenizer.enter(Name::HtmlFlowData); tokenizer.consume(); - State::Next(Name::HtmlFlowOpen) + State::Next(StateName::HtmlFlowOpen) } else { State::Nok } @@ -184,13 +184,13 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'!') => { tokenizer.consume(); - State::Next(Name::HtmlFlowDeclarationOpen) + State::Next(StateName::HtmlFlowDeclarationOpen) } Some(b'/') => { tokenizer.consume(); tokenizer.tokenize_state.seen = true; tokenizer.tokenize_state.start = tokenizer.point.index; - State::Next(Name::HtmlFlowTagCloseStart) + State::Next(StateName::HtmlFlowTagCloseStart) } Some(b'?') => { tokenizer.tokenize_state.marker = INSTRUCTION; @@ -199,12 +199,12 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { tokenizer.concrete = true; // While we’re in an instruction instead of a declaration, we’re on a `?` // right now, so we do need to search for `>`, similar to declarations. - State::Next(Name::HtmlFlowContinuationDeclarationInside) + State::Next(StateName::HtmlFlowContinuationDeclarationInside) } // ASCII alphabetical. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.tokenize_state.start = tokenizer.point.index; - State::Retry(Name::HtmlFlowTagName) + State::Retry(StateName::HtmlFlowTagName) } _ => State::Nok, } @@ -225,19 +225,19 @@ pub fn declaration_open(tokenizer: &mut Tokenizer) -> State { Some(b'-') => { tokenizer.consume(); tokenizer.tokenize_state.marker = COMMENT; - State::Next(Name::HtmlFlowCommentOpenInside) + State::Next(StateName::HtmlFlowCommentOpenInside) } Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); tokenizer.tokenize_state.marker = DECLARATION; // Do not form containers. tokenizer.concrete = true; - State::Next(Name::HtmlFlowContinuationDeclarationInside) + State::Next(StateName::HtmlFlowContinuationDeclarationInside) } Some(b'[') => { tokenizer.consume(); tokenizer.tokenize_state.marker = CDATA; - State::Next(Name::HtmlFlowCdataOpenInside) + State::Next(StateName::HtmlFlowCdataOpenInside) } _ => State::Nok, } @@ -254,7 +254,7 @@ pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); // Do not form containers. tokenizer.concrete = true; - State::Next(Name::HtmlFlowContinuationDeclarationInside) + State::Next(StateName::HtmlFlowContinuationDeclarationInside) } else { tokenizer.tokenize_state.marker = 0; State::Nok @@ -276,9 +276,9 @@ pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 0; // Do not form containers. tokenizer.concrete = true; - State::Next(Name::HtmlFlowContinuation) + State::Next(StateName::HtmlFlowContinuation) } else { - State::Next(Name::HtmlFlowCdataOpenInside) + State::Next(StateName::HtmlFlowCdataOpenInside) } } else { tokenizer.tokenize_state.marker = 0; @@ -296,7 +296,7 @@ pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State { if let Some(b'A'..=b'Z' | b'a'..=b'z') = tokenizer.current { tokenizer.consume(); - State::Next(Name::HtmlFlowTagName) + State::Next(StateName::HtmlFlowTagName) } else { tokenizer.tokenize_state.seen = false; tokenizer.tokenize_state.start = 0; @@ -335,17 +335,17 @@ pub fn tag_name(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = RAW; // Do not form containers. tokenizer.concrete = true; - State::Retry(Name::HtmlFlowContinuation) + State::Retry(StateName::HtmlFlowContinuation) } else if HTML_BLOCK_NAMES.contains(&name.as_str()) { tokenizer.tokenize_state.marker = BASIC; if slash { tokenizer.consume(); - State::Next(Name::HtmlFlowBasicSelfClosing) + State::Next(StateName::HtmlFlowBasicSelfClosing) } else { // Do not form containers. tokenizer.concrete = true; - State::Retry(Name::HtmlFlowContinuation) + State::Retry(StateName::HtmlFlowContinuation) } } else { tokenizer.tokenize_state.marker = COMPLETE; @@ -355,16 +355,16 @@ pub fn tag_name(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.marker = 0; State::Nok } else if closing_tag { - State::Retry(Name::HtmlFlowCompleteClosingTagAfter) + State::Retry(StateName::HtmlFlowCompleteClosingTagAfter) } else { - State::Retry(Name::HtmlFlowCompleteAttributeNameBefore) + State::Retry(StateName::HtmlFlowCompleteAttributeNameBefore) } } } // ASCII alphanumerical and `-`. Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlFlowTagName) + State::Next(StateName::HtmlFlowTagName) } Some(_) => { tokenizer.tokenize_state.seen = false; @@ -384,7 +384,7 @@ pub fn basic_self_closing(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); // Do not form containers. tokenizer.concrete = true; - State::Next(Name::HtmlFlowContinuation) + State::Next(StateName::HtmlFlowContinuation) } else { tokenizer.tokenize_state.marker = 0; State::Nok @@ -401,9 +401,9 @@ pub fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteClosingTagAfter) + State::Next(StateName::HtmlFlowCompleteClosingTagAfter) } - _ => State::Retry(Name::HtmlFlowCompleteEnd), + _ => State::Retry(StateName::HtmlFlowCompleteEnd), } } @@ -430,18 +430,18 @@ pub fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeNameBefore) + State::Next(StateName::HtmlFlowCompleteAttributeNameBefore) } Some(b'/') => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteEnd) + State::Next(StateName::HtmlFlowCompleteEnd) } // ASCII alphanumerical and `:` and `_`. Some(b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeName) + State::Next(StateName::HtmlFlowCompleteAttributeName) } - _ => State::Retry(Name::HtmlFlowCompleteEnd), + _ => State::Retry(StateName::HtmlFlowCompleteEnd), } } @@ -460,9 +460,9 @@ pub fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State { // ASCII alphanumerical and `-`, `.`, `:`, and `_`. Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeName) + State::Next(StateName::HtmlFlowCompleteAttributeName) } - _ => State::Retry(Name::HtmlFlowCompleteAttributeNameAfter), + _ => State::Retry(StateName::HtmlFlowCompleteAttributeNameAfter), } } @@ -479,13 +479,13 @@ pub fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeNameAfter) + State::Next(StateName::HtmlFlowCompleteAttributeNameAfter) } Some(b'=') => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeValueBefore) + State::Next(StateName::HtmlFlowCompleteAttributeValueBefore) } - _ => State::Retry(Name::HtmlFlowCompleteAttributeNameBefore), + _ => State::Retry(StateName::HtmlFlowCompleteAttributeNameBefore), } } @@ -506,14 +506,14 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { } Some(b'\t' | b' ') => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeValueBefore) + State::Next(StateName::HtmlFlowCompleteAttributeValueBefore) } Some(b'"' | b'\'') => { tokenizer.tokenize_state.marker_b = tokenizer.current.unwrap(); tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeValueQuoted) + State::Next(StateName::HtmlFlowCompleteAttributeValueQuoted) } - _ => State::Retry(Name::HtmlFlowCompleteAttributeValueUnquoted), + _ => State::Retry(StateName::HtmlFlowCompleteAttributeValueUnquoted), } } @@ -535,11 +535,11 @@ pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker_b => { tokenizer.tokenize_state.marker_b = 0; tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeValueQuotedAfter) + State::Next(StateName::HtmlFlowCompleteAttributeValueQuotedAfter) } _ => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeValueQuoted) + State::Next(StateName::HtmlFlowCompleteAttributeValueQuoted) } } } @@ -553,11 +553,11 @@ pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => { - State::Retry(Name::HtmlFlowCompleteAttributeNameAfter) + State::Retry(StateName::HtmlFlowCompleteAttributeNameAfter) } Some(_) => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAttributeValueUnquoted) + State::Next(StateName::HtmlFlowCompleteAttributeValueUnquoted) } } } @@ -571,7 +571,7 @@ pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { if let Some(b'\t' | b' ' | b'/' | b'>') = tokenizer.current { - State::Retry(Name::HtmlFlowCompleteAttributeNameBefore) + State::Retry(StateName::HtmlFlowCompleteAttributeNameBefore) } else { tokenizer.tokenize_state.marker = 0; State::Nok @@ -587,7 +587,7 @@ pub fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State pub fn complete_end(tokenizer: &mut Tokenizer) -> State { if let Some(b'>') = tokenizer.current { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAfter) + State::Next(StateName::HtmlFlowCompleteAfter) } else { tokenizer.tokenize_state.marker = 0; State::Nok @@ -605,11 +605,11 @@ pub fn complete_after(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') => { // Do not form containers. tokenizer.concrete = true; - State::Retry(Name::HtmlFlowContinuation) + State::Retry(StateName::HtmlFlowContinuation) } Some(b'\t' | b' ') => { tokenizer.consume(); - State::Next(Name::HtmlFlowCompleteAfter) + State::Next(StateName::HtmlFlowCompleteAfter) } Some(_) => { tokenizer.tokenize_state.marker = 0; @@ -630,41 +630,41 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.marker == BASIC || tokenizer.tokenize_state.marker == COMPLETE => { - tokenizer.exit(Token::HtmlFlowData); + tokenizer.exit(Name::HtmlFlowData); tokenizer.check( - Name::HtmlFlowBlankLineBefore, - State::Next(Name::HtmlFlowContinuationAfter), - State::Next(Name::HtmlFlowContinuationStart), + StateName::HtmlFlowBlankLineBefore, + State::Next(StateName::HtmlFlowContinuationAfter), + State::Next(StateName::HtmlFlowContinuationStart), ) } // Note: important that this is after the basic/complete case. None | Some(b'\n') => { - tokenizer.exit(Token::HtmlFlowData); - State::Retry(Name::HtmlFlowContinuationStart) + tokenizer.exit(Name::HtmlFlowData); + State::Retry(StateName::HtmlFlowContinuationStart) } Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationCommentInside) + State::Next(StateName::HtmlFlowContinuationCommentInside) } Some(b'<') if tokenizer.tokenize_state.marker == RAW => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationRawTagOpen) + State::Next(StateName::HtmlFlowContinuationRawTagOpen) } Some(b'>') if tokenizer.tokenize_state.marker == DECLARATION => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationClose) + State::Next(StateName::HtmlFlowContinuationClose) } Some(b'?') if tokenizer.tokenize_state.marker == INSTRUCTION => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationDeclarationInside) + State::Next(StateName::HtmlFlowContinuationDeclarationInside) } Some(b']') if tokenizer.tokenize_state.marker == CDATA => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationCdataInside) + State::Next(StateName::HtmlFlowContinuationCdataInside) } _ => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuation) + State::Next(StateName::HtmlFlowContinuation) } } } @@ -678,9 +678,9 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { tokenizer.check( - Name::NonLazyContinuationStart, - State::Next(Name::HtmlFlowContinuationStartNonLazy), - State::Next(Name::HtmlFlowContinuationAfter), + StateName::NonLazyContinuationStart, + State::Next(StateName::HtmlFlowContinuationStartNonLazy), + State::Next(StateName::HtmlFlowContinuationAfter), ) } @@ -694,10 +694,10 @@ pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::HtmlFlowContinuationBefore) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::HtmlFlowContinuationBefore) } _ => unreachable!("expected eol"), } @@ -712,10 +712,10 @@ pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn continuation_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => State::Retry(Name::HtmlFlowContinuationStart), + None | Some(b'\n') => State::Retry(StateName::HtmlFlowContinuationStart), _ => { - tokenizer.enter(Token::HtmlFlowData); - State::Retry(Name::HtmlFlowContinuation) + tokenizer.enter(Name::HtmlFlowData); + State::Retry(StateName::HtmlFlowContinuation) } } } @@ -730,9 +730,9 @@ pub fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-') => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationDeclarationInside) + State::Next(StateName::HtmlFlowContinuationDeclarationInside) } - _ => State::Retry(Name::HtmlFlowContinuation), + _ => State::Retry(StateName::HtmlFlowContinuation), } } @@ -747,9 +747,9 @@ pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State { Some(b'/') => { tokenizer.consume(); tokenizer.tokenize_state.start = tokenizer.point.index; - State::Next(Name::HtmlFlowContinuationRawEndTag) + State::Next(StateName::HtmlFlowContinuationRawEndTag) } - _ => State::Retry(Name::HtmlFlowContinuation), + _ => State::Retry(StateName::HtmlFlowContinuation), } } @@ -774,20 +774,20 @@ pub fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State { if HTML_RAW_NAMES.contains(&name.as_str()) { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationClose) + State::Next(StateName::HtmlFlowContinuationClose) } else { - State::Retry(Name::HtmlFlowContinuation) + State::Retry(StateName::HtmlFlowContinuation) } } Some(b'A'..=b'Z' | b'a'..=b'z') if tokenizer.point.index - tokenizer.tokenize_state.start < HTML_RAW_SIZE_MAX => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationRawEndTag) + State::Next(StateName::HtmlFlowContinuationRawEndTag) } _ => { tokenizer.tokenize_state.start = 0; - State::Retry(Name::HtmlFlowContinuation) + State::Retry(StateName::HtmlFlowContinuation) } } } @@ -802,9 +802,9 @@ pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b']') => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationDeclarationInside) + State::Next(StateName::HtmlFlowContinuationDeclarationInside) } - _ => State::Retry(Name::HtmlFlowContinuation), + _ => State::Retry(StateName::HtmlFlowContinuation), } } @@ -826,13 +826,13 @@ pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationClose) + State::Next(StateName::HtmlFlowContinuationClose) } Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationDeclarationInside) + State::Next(StateName::HtmlFlowContinuationDeclarationInside) } - _ => State::Retry(Name::HtmlFlowContinuation), + _ => State::Retry(StateName::HtmlFlowContinuation), } } @@ -845,12 +845,12 @@ pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { pub fn continuation_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::HtmlFlowData); - State::Retry(Name::HtmlFlowContinuationAfter) + tokenizer.exit(Name::HtmlFlowData); + State::Retry(StateName::HtmlFlowContinuationAfter) } _ => { tokenizer.consume(); - State::Next(Name::HtmlFlowContinuationClose) + State::Next(StateName::HtmlFlowContinuationClose) } } } @@ -862,7 +862,7 @@ pub fn continuation_close(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn continuation_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.exit(Token::HtmlFlow); + tokenizer.exit(Name::HtmlFlow); tokenizer.tokenize_state.marker = 0; // Feel free to interrupt. tokenizer.interrupt = false; @@ -879,8 +879,8 @@ pub fn continuation_after(tokenizer: &mut Tokenizer) -> State { /// | /// ``` pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::BlankLineStart) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::BlankLineStart) } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index dd14e70..27e92f5 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -56,8 +56,8 @@ use crate::constant::HTML_CDATA_PREFIX; use crate::construct::partial_space_or_tab::space_or_tab; -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of HTML (text) @@ -68,10 +68,10 @@ use crate::tokenizer::Tokenizer; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if Some(b'<') == tokenizer.current && tokenizer.parse_state.constructs.html_text { - tokenizer.enter(Token::HtmlText); - tokenizer.enter(Token::HtmlTextData); + tokenizer.enter(Name::HtmlText); + tokenizer.enter(Name::HtmlTextData); tokenizer.consume(); - State::Next(Name::HtmlTextOpen) + State::Next(StateName::HtmlTextOpen) } else { State::Nok } @@ -91,20 +91,20 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'!') => { tokenizer.consume(); - State::Next(Name::HtmlTextDeclarationOpen) + State::Next(StateName::HtmlTextDeclarationOpen) } Some(b'/') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagCloseStart) + State::Next(StateName::HtmlTextTagCloseStart) } Some(b'?') => { tokenizer.consume(); - State::Next(Name::HtmlTextInstruction) + State::Next(StateName::HtmlTextInstruction) } // ASCII alphabetical. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpen) + State::Next(StateName::HtmlTextTagOpen) } _ => State::Nok, } @@ -124,16 +124,16 @@ pub fn declaration_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-') => { tokenizer.consume(); - State::Next(Name::HtmlTextCommentOpenInside) + State::Next(StateName::HtmlTextCommentOpenInside) } // ASCII alphabetical. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlTextDeclaration) + State::Next(StateName::HtmlTextDeclaration) } Some(b'[') => { tokenizer.consume(); - State::Next(Name::HtmlTextCdataOpenInside) + State::Next(StateName::HtmlTextCdataOpenInside) } _ => State::Nok, } @@ -149,7 +149,7 @@ pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-') => { tokenizer.consume(); - State::Next(Name::HtmlTextCommentStart) + State::Next(StateName::HtmlTextCommentStart) } _ => State::Nok, } @@ -173,9 +173,9 @@ pub fn comment_start(tokenizer: &mut Tokenizer) -> State { Some(b'>') => State::Nok, Some(b'-') => { tokenizer.consume(); - State::Next(Name::HtmlTextCommentStartDash) + State::Next(StateName::HtmlTextCommentStartDash) } - _ => State::Retry(Name::HtmlTextComment), + _ => State::Retry(StateName::HtmlTextComment), } } @@ -195,7 +195,7 @@ pub fn comment_start(tokenizer: &mut Tokenizer) -> State { pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => State::Nok, - _ => State::Retry(Name::HtmlTextComment), + _ => State::Retry(StateName::HtmlTextComment), } } @@ -209,17 +209,17 @@ pub fn comment(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Nok, Some(b'\n') => tokenizer.attempt( - Name::HtmlTextLineEndingBefore, - State::Next(Name::HtmlTextComment), + StateName::HtmlTextLineEndingBefore, + State::Next(StateName::HtmlTextComment), State::Nok, ), Some(b'-') => { tokenizer.consume(); - State::Next(Name::HtmlTextCommentClose) + State::Next(StateName::HtmlTextCommentClose) } _ => { tokenizer.consume(); - State::Next(Name::HtmlTextComment) + State::Next(StateName::HtmlTextComment) } } } @@ -234,9 +234,9 @@ pub fn comment_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'-') => { tokenizer.consume(); - State::Next(Name::HtmlTextEnd) + State::Next(StateName::HtmlTextEnd) } - _ => State::Retry(Name::HtmlTextComment), + _ => State::Retry(StateName::HtmlTextComment), } } @@ -253,9 +253,9 @@ pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() { tokenizer.tokenize_state.size = 0; - State::Next(Name::HtmlTextCdata) + State::Next(StateName::HtmlTextCdata) } else { - State::Next(Name::HtmlTextCdataOpenInside) + State::Next(StateName::HtmlTextCdataOpenInside) } } else { State::Nok @@ -272,17 +272,17 @@ pub fn cdata(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Nok, Some(b'\n') => tokenizer.attempt( - Name::HtmlTextLineEndingBefore, - State::Next(Name::HtmlTextCdata), + StateName::HtmlTextLineEndingBefore, + State::Next(StateName::HtmlTextCdata), State::Nok, ), Some(b']') => { tokenizer.consume(); - State::Next(Name::HtmlTextCdataClose) + State::Next(StateName::HtmlTextCdataClose) } _ => { tokenizer.consume(); - State::Next(Name::HtmlTextCdata) + State::Next(StateName::HtmlTextCdata) } } } @@ -297,9 +297,9 @@ pub fn cdata_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b']') => { tokenizer.consume(); - State::Next(Name::HtmlTextCdataEnd) + State::Next(StateName::HtmlTextCdataEnd) } - _ => State::Retry(Name::HtmlTextCdata), + _ => State::Retry(StateName::HtmlTextCdata), } } @@ -311,9 +311,9 @@ pub fn cdata_close(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn cdata_end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'>') => State::Retry(Name::HtmlTextEnd), - Some(b']') => State::Retry(Name::HtmlTextCdataClose), - _ => State::Retry(Name::HtmlTextCdata), + Some(b'>') => State::Retry(StateName::HtmlTextEnd), + Some(b']') => State::Retry(StateName::HtmlTextCdataClose), + _ => State::Retry(StateName::HtmlTextCdata), } } @@ -325,15 +325,15 @@ pub fn cdata_end(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn declaration(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'>') => State::Retry(Name::HtmlTextEnd), + None | Some(b'>') => State::Retry(StateName::HtmlTextEnd), Some(b'\n') => tokenizer.attempt( - Name::HtmlTextLineEndingBefore, - State::Next(Name::HtmlTextDeclaration), + StateName::HtmlTextLineEndingBefore, + State::Next(StateName::HtmlTextDeclaration), State::Nok, ), _ => { tokenizer.consume(); - State::Next(Name::HtmlTextDeclaration) + State::Next(StateName::HtmlTextDeclaration) } } } @@ -348,17 +348,17 @@ pub fn instruction(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Nok, Some(b'\n') => tokenizer.attempt( - Name::HtmlTextLineEndingBefore, - State::Next(Name::HtmlTextInstruction), + StateName::HtmlTextLineEndingBefore, + State::Next(StateName::HtmlTextInstruction), State::Nok, ), Some(b'?') => { tokenizer.consume(); - State::Next(Name::HtmlTextInstructionClose) + State::Next(StateName::HtmlTextInstructionClose) } _ => { tokenizer.consume(); - State::Next(Name::HtmlTextInstruction) + State::Next(StateName::HtmlTextInstruction) } } } @@ -371,8 +371,8 @@ pub fn instruction(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn instruction_close(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'>') => State::Retry(Name::HtmlTextEnd), - _ => State::Retry(Name::HtmlTextInstruction), + Some(b'>') => State::Retry(StateName::HtmlTextEnd), + _ => State::Retry(StateName::HtmlTextInstruction), } } @@ -387,7 +387,7 @@ pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State { // ASCII alphabetical. Some(b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagClose) + State::Next(StateName::HtmlTextTagClose) } _ => State::Nok, } @@ -404,9 +404,9 @@ pub fn tag_close(tokenizer: &mut Tokenizer) -> State { // ASCII alphanumerical and `-`. Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagClose) + State::Next(StateName::HtmlTextTagClose) } - _ => State::Retry(Name::HtmlTextTagCloseBetween), + _ => State::Retry(StateName::HtmlTextTagCloseBetween), } } @@ -419,15 +419,15 @@ pub fn tag_close(tokenizer: &mut Tokenizer) -> State { pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => tokenizer.attempt( - Name::HtmlTextLineEndingBefore, - State::Next(Name::HtmlTextTagCloseBetween), + StateName::HtmlTextLineEndingBefore, + State::Next(StateName::HtmlTextTagCloseBetween), State::Nok, ), Some(b'\t' | b' ') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagCloseBetween) + State::Next(StateName::HtmlTextTagCloseBetween) } - _ => State::Retry(Name::HtmlTextEnd), + _ => State::Retry(StateName::HtmlTextEnd), } } @@ -442,9 +442,9 @@ pub fn tag_open(tokenizer: &mut Tokenizer) -> State { // ASCII alphanumerical and `-`. Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpen) + State::Next(StateName::HtmlTextTagOpen) } - Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => State::Retry(Name::HtmlTextTagOpenBetween), + Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => State::Retry(StateName::HtmlTextTagOpenBetween), _ => State::Nok, } } @@ -458,24 +458,24 @@ pub fn tag_open(tokenizer: &mut Tokenizer) -> State { pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => tokenizer.attempt( - Name::HtmlTextLineEndingBefore, - State::Next(Name::HtmlTextTagOpenBetween), + StateName::HtmlTextLineEndingBefore, + State::Next(StateName::HtmlTextTagOpenBetween), State::Nok, ), Some(b'\t' | b' ') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenBetween) + State::Next(StateName::HtmlTextTagOpenBetween) } Some(b'/') => { tokenizer.consume(); - State::Next(Name::HtmlTextEnd) + State::Next(StateName::HtmlTextEnd) } // ASCII alphabetical and `:` and `_`. Some(b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeName) + State::Next(StateName::HtmlTextTagOpenAttributeName) } - _ => State::Retry(Name::HtmlTextEnd), + _ => State::Retry(StateName::HtmlTextEnd), } } @@ -490,9 +490,9 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { // ASCII alphabetical and `-`, `.`, `:`, and `_`. Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeName) + State::Next(StateName::HtmlTextTagOpenAttributeName) } - _ => State::Retry(Name::HtmlTextTagOpenAttributeNameAfter), + _ => State::Retry(StateName::HtmlTextTagOpenAttributeNameAfter), } } @@ -506,19 +506,19 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State { pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => tokenizer.attempt( - Name::HtmlTextLineEndingBefore, - State::Next(Name::HtmlTextTagOpenAttributeNameAfter), + StateName::HtmlTextLineEndingBefore, + State::Next(StateName::HtmlTextTagOpenAttributeNameAfter), State::Nok, ), Some(b'\t' | b' ') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeNameAfter) + State::Next(StateName::HtmlTextTagOpenAttributeNameAfter) } Some(b'=') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeValueBefore) + State::Next(StateName::HtmlTextTagOpenAttributeValueBefore) } - _ => State::Retry(Name::HtmlTextTagOpenBetween), + _ => State::Retry(StateName::HtmlTextTagOpenBetween), } } @@ -533,22 +533,22 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok, Some(b'\n') => tokenizer.attempt( - Name::HtmlTextLineEndingBefore, - State::Next(Name::HtmlTextTagOpenAttributeValueBefore), + StateName::HtmlTextLineEndingBefore, + State::Next(StateName::HtmlTextTagOpenAttributeValueBefore), State::Nok, ), Some(b'\t' | b' ') => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeValueBefore) + State::Next(StateName::HtmlTextTagOpenAttributeValueBefore) } Some(b'"' | b'\'') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeValueQuoted) + State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted) } Some(_) => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeValueUnquoted) + State::Next(StateName::HtmlTextTagOpenAttributeValueUnquoted) } } } @@ -566,18 +566,18 @@ pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { State::Nok } Some(b'\n') => tokenizer.attempt( - Name::HtmlTextLineEndingBefore, - State::Next(Name::HtmlTextTagOpenAttributeValueQuoted), + StateName::HtmlTextLineEndingBefore, + State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted), State::Nok, ), Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.tokenize_state.marker = 0; tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeValueQuotedAfter) + State::Next(StateName::HtmlTextTagOpenAttributeValueQuotedAfter) } _ => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeValueQuoted) + State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted) } } } @@ -591,10 +591,10 @@ pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'"' | b'\'' | b'<' | b'=' | b'`') => State::Nok, - Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => State::Retry(Name::HtmlTextTagOpenBetween), + Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => State::Retry(StateName::HtmlTextTagOpenBetween), Some(_) => { tokenizer.consume(); - State::Next(Name::HtmlTextTagOpenAttributeValueUnquoted) + State::Next(StateName::HtmlTextTagOpenAttributeValueUnquoted) } } } @@ -608,7 +608,7 @@ pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => State::Retry(Name::HtmlTextTagOpenBetween), + Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => State::Retry(StateName::HtmlTextTagOpenBetween), _ => State::Nok, } } @@ -623,8 +623,8 @@ pub fn end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.consume(); - tokenizer.exit(Token::HtmlTextData); - tokenizer.exit(Token::HtmlText); + tokenizer.exit(Name::HtmlTextData); + tokenizer.exit(Name::HtmlText); State::Ok } _ => State::Nok, @@ -644,11 +644,11 @@ pub fn end(tokenizer: &mut Tokenizer) -> State { pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { - tokenizer.exit(Token::HtmlTextData); - tokenizer.enter(Token::LineEnding); + tokenizer.exit(Name::HtmlTextData); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::HtmlTextLineEndingAfter) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::HtmlTextLineEndingAfter) } _ => unreachable!("expected eol"), } @@ -668,8 +668,8 @@ pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::HtmlTextLineEndingAfterPrefix), - State::Next(Name::HtmlTextLineEndingAfterPrefix), + State::Next(StateName::HtmlTextLineEndingAfterPrefix), + State::Next(StateName::HtmlTextLineEndingAfterPrefix), ) } @@ -684,6 +684,6 @@ pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn line_ending_after_prefix(tokenizer: &mut Tokenizer) -> State { - tokenizer.enter(Token::HtmlTextData); + tokenizer.enter(Name::HtmlTextData); State::Ok } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 0607077..61f378d 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -148,9 +148,10 @@ use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX; use crate::construct::partial_space_or_tab::space_or_tab_eol; -use crate::state::{Name, State}; -use crate::token::Token; -use crate::tokenizer::{Event, EventType, Media, Tokenizer}; +use crate::event::{Event, Kind, Name}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::{Media, Tokenizer}; + use crate::util::{ normalize_identifier::normalize_identifier, skip, @@ -195,15 +196,15 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { // Mark as balanced if the info is inactive. if label_start.inactive { - return State::Retry(Name::LabelEndNok); + return State::Retry(StateName::LabelEndNok); } - tokenizer.enter(Token::LabelEnd); - tokenizer.enter(Token::LabelMarker); + tokenizer.enter(Name::LabelEnd); + tokenizer.enter(Name::LabelMarker); tokenizer.consume(); - tokenizer.exit(Token::LabelMarker); - tokenizer.exit(Token::LabelEnd); - return State::Next(Name::LabelEndAfter); + tokenizer.exit(Name::LabelMarker); + tokenizer.exit(Name::LabelEnd); + return State::Next(StateName::LabelEndAfter); } } @@ -240,29 +241,29 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Resource (`[asd](fgh)`)? Some(b'(') => tokenizer.attempt( - Name::LabelEndResourceStart, - State::Next(Name::LabelEndOk), + StateName::LabelEndResourceStart, + State::Next(StateName::LabelEndOk), State::Next(if defined { - Name::LabelEndOk + StateName::LabelEndOk } else { - Name::LabelEndNok + StateName::LabelEndNok }), ), // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference? Some(b'[') => tokenizer.attempt( - Name::LabelEndReferenceFull, - State::Next(Name::LabelEndOk), + StateName::LabelEndReferenceFull, + State::Next(StateName::LabelEndOk), State::Next(if defined { - Name::LabelEndReferenceNotFull + StateName::LabelEndReferenceNotFull } else { - Name::LabelEndNok + StateName::LabelEndNok }), ), // Shortcut (`[asd]`) reference? _ => State::Retry(if defined { - Name::LabelEndOk + StateName::LabelEndOk } else { - Name::LabelEndNok + StateName::LabelEndNok }), } } @@ -279,9 +280,9 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn reference_not_full(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::LabelEndReferenceCollapsed, - State::Next(Name::LabelEndOk), - State::Next(Name::LabelEndNok), + StateName::LabelEndReferenceCollapsed, + State::Next(StateName::LabelEndOk), + State::Next(StateName::LabelEndNok), ) } @@ -311,13 +312,13 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State { .label_start_list_loose .append(&mut left); - let is_link = tokenizer.events[label_start.start.0].token_type == Token::LabelLink; + let is_link = tokenizer.events[label_start.start.0].name == Name::LabelLink; if is_link { let mut index = 0; while index < tokenizer.tokenize_state.label_start_stack.len() { let label_start = &mut tokenizer.tokenize_state.label_start_stack[index]; - if tokenizer.events[label_start.start.0].token_type == Token::LabelLink { + if tokenizer.events[label_start.start.0].name == Name::LabelLink { label_start.inactive = true; } index += 1; @@ -367,11 +368,11 @@ pub fn nok(tokenizer: &mut Tokenizer) -> State { pub fn resource_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'(') => { - tokenizer.enter(Token::Resource); - tokenizer.enter(Token::ResourceMarker); + tokenizer.enter(Name::Resource); + tokenizer.enter(Name::ResourceMarker); tokenizer.consume(); - tokenizer.exit(Token::ResourceMarker); - State::Next(Name::LabelEndResourceBefore) + tokenizer.exit(Name::ResourceMarker); + State::Next(StateName::LabelEndResourceBefore) } _ => unreachable!("expected `(`"), } @@ -387,8 +388,8 @@ pub fn resource_before(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, - State::Next(Name::LabelEndResourceOpen), - State::Next(Name::LabelEndResourceOpen), + State::Next(StateName::LabelEndResourceOpen), + State::Next(StateName::LabelEndResourceOpen), ) } @@ -400,19 +401,19 @@ pub fn resource_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn resource_open(tokenizer: &mut Tokenizer) -> State { if let Some(b')') = tokenizer.current { - State::Retry(Name::LabelEndResourceEnd) + State::Retry(StateName::LabelEndResourceEnd) } else { - tokenizer.tokenize_state.token_1 = Token::ResourceDestination; - tokenizer.tokenize_state.token_2 = Token::ResourceDestinationLiteral; - tokenizer.tokenize_state.token_3 = Token::ResourceDestinationLiteralMarker; - tokenizer.tokenize_state.token_4 = Token::ResourceDestinationRaw; - tokenizer.tokenize_state.token_5 = Token::ResourceDestinationString; + tokenizer.tokenize_state.token_1 = Name::ResourceDestination; + tokenizer.tokenize_state.token_2 = Name::ResourceDestinationLiteral; + tokenizer.tokenize_state.token_3 = Name::ResourceDestinationLiteralMarker; + tokenizer.tokenize_state.token_4 = Name::ResourceDestinationRaw; + tokenizer.tokenize_state.token_5 = Name::ResourceDestinationString; tokenizer.tokenize_state.size_b = RESOURCE_DESTINATION_BALANCE_MAX; tokenizer.attempt( - Name::DestinationStart, - State::Next(Name::LabelEndResourceDestinationAfter), - State::Next(Name::LabelEndResourceDestinationMissing), + StateName::DestinationStart, + State::Next(StateName::LabelEndResourceDestinationAfter), + State::Next(StateName::LabelEndResourceDestinationMissing), ) } } @@ -424,27 +425,27 @@ pub fn resource_open(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::Data; - tokenizer.tokenize_state.token_2 = Token::Data; - tokenizer.tokenize_state.token_3 = Token::Data; - tokenizer.tokenize_state.token_4 = Token::Data; - tokenizer.tokenize_state.token_5 = Token::Data; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; tokenizer.tokenize_state.size_b = 0; let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, - State::Next(Name::LabelEndResourceBetween), - State::Next(Name::LabelEndResourceEnd), + State::Next(StateName::LabelEndResourceBetween), + State::Next(StateName::LabelEndResourceEnd), ) } /// Without destination. pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::Data; - tokenizer.tokenize_state.token_2 = Token::Data; - tokenizer.tokenize_state.token_3 = Token::Data; - tokenizer.tokenize_state.token_4 = Token::Data; - tokenizer.tokenize_state.token_5 = Token::Data; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; tokenizer.tokenize_state.size_b = 0; State::Nok } @@ -458,16 +459,16 @@ pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State { pub fn resource_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'"' | b'\'' | b'(') => { - tokenizer.tokenize_state.token_1 = Token::ResourceTitle; - tokenizer.tokenize_state.token_2 = Token::ResourceTitleMarker; - tokenizer.tokenize_state.token_3 = Token::ResourceTitleString; + tokenizer.tokenize_state.token_1 = Name::ResourceTitle; + tokenizer.tokenize_state.token_2 = Name::ResourceTitleMarker; + tokenizer.tokenize_state.token_3 = Name::ResourceTitleString; tokenizer.attempt( - Name::TitleStart, - State::Next(Name::LabelEndResourceTitleAfter), + StateName::TitleStart, + State::Next(StateName::LabelEndResourceTitleAfter), State::Nok, ) } - _ => State::Retry(Name::LabelEndResourceEnd), + _ => State::Retry(StateName::LabelEndResourceEnd), } } @@ -478,14 +479,14 @@ pub fn resource_between(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::Data; - tokenizer.tokenize_state.token_2 = Token::Data; - tokenizer.tokenize_state.token_3 = Token::Data; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, - State::Next(Name::LabelEndResourceEnd), - State::Next(Name::LabelEndResourceEnd), + State::Next(StateName::LabelEndResourceEnd), + State::Next(StateName::LabelEndResourceEnd), ) } @@ -498,10 +499,10 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { pub fn resource_end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b')') => { - tokenizer.enter(Token::ResourceMarker); + tokenizer.enter(Name::ResourceMarker); tokenizer.consume(); - tokenizer.exit(Token::ResourceMarker); - tokenizer.exit(Token::Resource); + tokenizer.exit(Name::ResourceMarker); + tokenizer.exit(Name::Resource); State::Ok } _ => State::Nok, @@ -517,12 +518,12 @@ pub fn resource_end(tokenizer: &mut Tokenizer) -> State { pub fn reference_full(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { - tokenizer.tokenize_state.token_1 = Token::Reference; - tokenizer.tokenize_state.token_2 = Token::ReferenceMarker; - tokenizer.tokenize_state.token_3 = Token::ReferenceString; + tokenizer.tokenize_state.token_1 = Name::Reference; + tokenizer.tokenize_state.token_2 = Name::ReferenceMarker; + tokenizer.tokenize_state.token_3 = Name::ReferenceString; tokenizer.attempt( - Name::LabelStart, - State::Next(Name::LabelEndReferenceFullAfter), + StateName::LabelStart, + State::Next(StateName::LabelEndReferenceFullAfter), State::Nok, ) } @@ -537,9 +538,9 @@ pub fn reference_full(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Token::Data; - tokenizer.tokenize_state.token_2 = Token::Data; - tokenizer.tokenize_state.token_3 = Token::Data; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; if tokenizer .parse_state @@ -553,7 +554,7 @@ pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State { skip::to_back( &tokenizer.events, tokenizer.events.len() - 1, - &[Token::ReferenceString], + &[Name::ReferenceString], ), ), ) @@ -577,11 +578,11 @@ pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State { pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { - tokenizer.enter(Token::Reference); - tokenizer.enter(Token::ReferenceMarker); + tokenizer.enter(Name::Reference); + tokenizer.enter(Name::ReferenceMarker); tokenizer.consume(); - tokenizer.exit(Token::ReferenceMarker); - State::Next(Name::LabelEndReferenceCollapsedOpen) + tokenizer.exit(Name::ReferenceMarker); + State::Next(StateName::LabelEndReferenceCollapsedOpen) } _ => State::Nok, } @@ -598,10 +599,10 @@ pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State { pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b']') => { - tokenizer.enter(Token::ReferenceMarker); + tokenizer.enter(Name::ReferenceMarker); tokenizer.consume(); - tokenizer.exit(Token::ReferenceMarker); - tokenizer.exit(Token::Reference); + tokenizer.exit(Name::ReferenceMarker); + tokenizer.exit(Name::Reference); State::Ok } _ => State::Nok, @@ -633,14 +634,14 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { data_exit_index - data_enter_index + 1, vec![ Event { - event_type: EventType::Enter, - token_type: Token::Data, + kind: Kind::Enter, + name: Name::Data, point: events[data_enter_index].point.clone(), link: None, }, Event { - event_type: EventType::Exit, - token_type: Token::Data, + kind: Kind::Exit, + name: Name::Data, point: events[data_exit_index].point.clone(), link: None, }, @@ -659,7 +660,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { let group_enter_event = &events[group_enter_index]; // LabelLink:Exit or LabelImage:Exit. let text_enter_index = media.start.0 - + (if group_enter_event.token_type == Token::LabelLink { + + (if group_enter_event.name == Name::LabelLink { 4 } else { 6 @@ -677,18 +678,18 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { 0, vec![ Event { - event_type: EventType::Enter, - token_type: if group_enter_event.token_type == Token::LabelLink { - Token::Link + kind: Kind::Enter, + name: if group_enter_event.name == Name::LabelLink { + Name::Link } else { - Token::Image + Name::Image }, point: group_enter_event.point.clone(), link: None, }, Event { - event_type: EventType::Enter, - token_type: Token::Label, + kind: Kind::Enter, + name: Name::Label, point: group_enter_event.point.clone(), link: None, }, @@ -702,8 +703,8 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { text_enter_index, 0, vec![Event { - event_type: EventType::Enter, - token_type: Token::LabelText, + kind: Kind::Enter, + name: Name::LabelText, point: events[text_enter_index].point.clone(), link: None, }], @@ -714,8 +715,8 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { text_exit_index, 0, vec![Event { - event_type: EventType::Exit, - token_type: Token::LabelText, + kind: Kind::Exit, + name: Name::LabelText, point: events[text_exit_index].point.clone(), link: None, }], @@ -727,8 +728,8 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { label_exit_index + 1, 0, vec![Event { - event_type: EventType::Exit, - token_type: Token::Label, + kind: Kind::Exit, + name: Name::Label, point: events[label_exit_index].point.clone(), link: None, }], @@ -739,11 +740,11 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { group_end_index + 1, 0, vec![Event { - event_type: EventType::Exit, - token_type: if group_enter_event.token_type == Token::LabelLink { - Token::Link + kind: Kind::Exit, + name: if group_enter_event.name == Name::LabelLink { + Name::Link } else { - Token::Image + Name::Image }, point: events[group_end_index].point.clone(), link: None, diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 7703ba4..2f7c0bf 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -29,8 +29,8 @@ //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use super::label_end::resolve_media; -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::{LabelStart, Tokenizer}; /// Start of label (image) start. @@ -42,11 +42,11 @@ use crate::tokenizer::{LabelStart, Tokenizer}; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'!') if tokenizer.parse_state.constructs.label_start_image => { - tokenizer.enter(Token::LabelImage); - tokenizer.enter(Token::LabelImageMarker); + tokenizer.enter(Name::LabelImage); + tokenizer.enter(Name::LabelImageMarker); tokenizer.consume(); - tokenizer.exit(Token::LabelImageMarker); - State::Next(Name::LabelStartImageOpen) + tokenizer.exit(Name::LabelImageMarker); + State::Next(StateName::LabelStartImageOpen) } _ => State::Nok, } @@ -61,10 +61,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { - tokenizer.enter(Token::LabelMarker); + tokenizer.enter(Name::LabelMarker); tokenizer.consume(); - tokenizer.exit(Token::LabelMarker); - tokenizer.exit(Token::LabelImage); + tokenizer.exit(Name::LabelMarker); + tokenizer.exit(Name::LabelImage); tokenizer.tokenize_state.label_start_stack.push(LabelStart { start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1), balanced: false, diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index 3ca51bf..456a4e9 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -28,8 +28,8 @@ //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element use super::label_end::resolve_media; +use crate::event::Name; use crate::state::State; -use crate::token::Token; use crate::tokenizer::{LabelStart, Tokenizer}; /// Start of label (link) start. @@ -42,11 +42,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') if tokenizer.parse_state.constructs.label_start_link => { let start = tokenizer.events.len(); - tokenizer.enter(Token::LabelLink); - tokenizer.enter(Token::LabelMarker); + tokenizer.enter(Name::LabelLink); + tokenizer.enter(Name::LabelMarker); tokenizer.consume(); - tokenizer.exit(Token::LabelMarker); - tokenizer.exit(Token::LabelLink); + tokenizer.exit(Name::LabelMarker); + tokenizer.exit(Name::LabelLink); tokenizer.tokenize_state.label_start_stack.push(LabelStart { start: (start, tokenizer.events.len() - 1), balanced: false, diff --git a/src/construct/list.rs b/src/construct/list.rs index 516cec7..ded77d0 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -46,9 +46,9 @@ use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}; use crate::construct::partial_space_or_tab::space_or_tab_min_max; -use crate::state::{Name, State}; -use crate::token::Token; -use crate::tokenizer::{EventType, Tokenizer}; +use crate::event::{Kind, Name}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; use crate::util::{ skip, slice::{Position, Slice}, @@ -62,7 +62,7 @@ use crate::util::{ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.list { - tokenizer.enter(Token::ListItem); + tokenizer.enter(Name::ListItem); let name = space_or_tab_min_max( tokenizer, 0, @@ -72,7 +72,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { usize::MAX }, ); - tokenizer.attempt(name, State::Next(Name::ListBefore), State::Nok) + tokenizer.attempt(name, State::Next(StateName::ListBefore), State::Nok) } else { State::Nok } @@ -88,14 +88,14 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Unordered. Some(b'*' | b'-') => tokenizer.check( - Name::ThematicBreakStart, - State::Next(Name::ListNok), - State::Next(Name::ListBeforeUnordered), + StateName::ThematicBreakStart, + State::Next(StateName::ListNok), + State::Next(StateName::ListBeforeUnordered), ), - Some(b'+') => State::Retry(Name::ListBeforeUnordered), + Some(b'+') => State::Retry(StateName::ListBeforeUnordered), // Ordered. - Some(b'0'..=b'9') if !tokenizer.interrupt => State::Retry(Name::ListBeforeOrdered), - Some(b'1') => State::Retry(Name::ListBeforeOrdered), + Some(b'0'..=b'9') if !tokenizer.interrupt => State::Retry(StateName::ListBeforeOrdered), + Some(b'1') => State::Retry(StateName::ListBeforeOrdered), _ => State::Nok, } } @@ -109,8 +109,8 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn before_unordered(tokenizer: &mut Tokenizer) -> State { - tokenizer.enter(Token::ListItemPrefix); - State::Retry(Name::ListMarker) + tokenizer.enter(Name::ListItemPrefix); + State::Retry(StateName::ListMarker) } /// Start of an ordered list item. @@ -120,9 +120,9 @@ pub fn before_unordered(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn before_ordered(tokenizer: &mut Tokenizer) -> State { - tokenizer.enter(Token::ListItemPrefix); - tokenizer.enter(Token::ListItemValue); - State::Retry(Name::ListValue) + tokenizer.enter(Name::ListItemPrefix); + tokenizer.enter(Name::ListItemValue); + State::Retry(StateName::ListValue) } /// In an ordered list item value. @@ -134,13 +134,13 @@ pub fn before_ordered(tokenizer: &mut Tokenizer) -> State { pub fn value(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'.' | b')') if !tokenizer.interrupt || tokenizer.tokenize_state.size < 2 => { - tokenizer.exit(Token::ListItemValue); - State::Retry(Name::ListMarker) + tokenizer.exit(Name::ListItemValue); + State::Retry(StateName::ListMarker) } Some(b'0'..=b'9') if tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX => { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(Name::ListValue) + State::Next(StateName::ListValue) } _ => { tokenizer.tokenize_state.size = 0; @@ -158,10 +158,10 @@ pub fn value(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn marker(tokenizer: &mut Tokenizer) -> State { - tokenizer.enter(Token::ListItemMarker); + tokenizer.enter(Name::ListItemMarker); tokenizer.consume(); - tokenizer.exit(Token::ListItemMarker); - State::Next(Name::ListMarkerAfter) + tokenizer.exit(Name::ListItemMarker); + State::Next(StateName::ListMarkerAfter) } /// After a list item marker. @@ -175,9 +175,9 @@ pub fn marker(tokenizer: &mut Tokenizer) -> State { pub fn marker_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size = 1; tokenizer.check( - Name::BlankLineStart, - State::Next(Name::ListAfter), - State::Next(Name::ListMarkerAfterFilled), + StateName::BlankLineStart, + State::Next(StateName::ListAfter), + State::Next(StateName::ListMarkerAfterFilled), ) } @@ -192,9 +192,9 @@ pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State { // Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace. tokenizer.attempt( - Name::ListWhitespace, - State::Next(Name::ListAfter), - State::Next(Name::ListPrefixOther), + StateName::ListWhitespace, + State::Next(StateName::ListAfter), + State::Next(StateName::ListPrefixOther), ) } @@ -206,7 +206,11 @@ pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn whitespace(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_min_max(tokenizer, 1, TAB_SIZE); - tokenizer.attempt(name, State::Next(Name::ListWhitespaceAfter), State::Nok) + tokenizer.attempt( + name, + State::Next(StateName::ListWhitespaceAfter), + State::Nok, + ) } /// After acceptable whitespace. @@ -232,10 +236,10 @@ pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State { pub fn prefix_other(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\t' | b' ') => { - tokenizer.enter(Token::SpaceOrTab); + tokenizer.enter(Name::SpaceOrTab); tokenizer.consume(); - tokenizer.exit(Token::SpaceOrTab); - State::Next(Name::ListAfter) + tokenizer.exit(Name::SpaceOrTab); + State::Next(StateName::ListAfter) } _ => State::Nok, } @@ -257,7 +261,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { let start = skip::to_back( &tokenizer.events, tokenizer.events.len() - 1, - &[Token::ListItem], + &[Name::ListItem], ); let mut prefix = Slice::from_position( tokenizer.parse_state.bytes, @@ -278,7 +282,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { container.blank_initial = blank; container.size = prefix; - tokenizer.exit(Token::ListItemPrefix); + tokenizer.exit(Name::ListItemPrefix); tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve_list_item)); State::Ok } @@ -293,9 +297,9 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn cont_start(tokenizer: &mut Tokenizer) -> State { tokenizer.check( - Name::BlankLineStart, - State::Next(Name::ListContBlank), - State::Next(Name::ListContFilled), + StateName::BlankLineStart, + State::Next(StateName::ListContBlank), + State::Next(StateName::ListContFilled), ) } @@ -317,7 +321,7 @@ pub fn cont_blank(tokenizer: &mut Tokenizer) -> State { } else { let name = space_or_tab_min_max(tokenizer, 0, size); // Consume, optionally, at most `size`. - tokenizer.attempt(name, State::Next(Name::ListOk), State::Nok) + tokenizer.attempt(name, State::Next(StateName::ListOk), State::Nok) } } @@ -337,7 +341,7 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State { // Consume exactly `size`. let name = space_or_tab_min_max(tokenizer, size, size); - tokenizer.attempt(name, State::Next(Name::ListOk), State::Nok) + tokenizer.attempt(name, State::Next(StateName::ListOk), State::Nok) } /// A state fn to yield [`State::Ok`]. @@ -361,10 +365,10 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) { while index < tokenizer.events.len() { let event = &tokenizer.events[index]; - if event.token_type == Token::ListItem { - if event.event_type == EventType::Enter { - let end = skip::opt(&tokenizer.events, index, &[Token::ListItem]) - 1; - let marker = skip::to(&tokenizer.events, index, &[Token::ListItemMarker]); + if event.name == Name::ListItem { + if event.kind == Kind::Enter { + let end = skip::opt(&tokenizer.events, index, &[Name::ListItem]) - 1; + let marker = skip::to(&tokenizer.events, index, &[Name::ListItemMarker]); // Guaranteed to be a valid ASCII byte. let marker = Slice::from_index( tokenizer.parse_state.bytes, @@ -384,10 +388,10 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) { &tokenizer.events, previous.3 + 1, &[ - Token::SpaceOrTab, - Token::LineEnding, - Token::BlankLineEnding, - Token::BlockQuotePrefix, + Name::SpaceOrTab, + Name::LineEnding, + Name::BlankLineEnding, + Name::BlockQuotePrefix, ], ); @@ -441,12 +445,12 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) { let list_item = &lists[index]; let mut list_start = tokenizer.events[list_item.2].clone(); let mut list_end = tokenizer.events[list_item.3].clone(); - let token_type = match list_item.0 { - b'.' | b')' => Token::ListOrdered, - _ => Token::ListUnordered, + let name = match list_item.0 { + b'.' | b')' => Name::ListOrdered, + _ => Name::ListUnordered, }; - list_start.token_type = token_type.clone(); - list_end.token_type = token_type; + list_start.name = name.clone(); + list_end.name = name; tokenizer.map.add(list_item.2, 0, vec![list_start]); tokenizer.map.add(list_item.3 + 1, 0, vec![list_end]); diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index dec25b8..b605c0f 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -32,9 +32,9 @@ //! [code_text]: crate::construct::code_text //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element -use crate::state::{Name, State}; -use crate::token::Token; -use crate::tokenizer::{ContentType, EventType, Tokenizer}; +use crate::event::{Content, Kind, Name}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; use crate::util::skip::opt as skip_opt; /// Before a paragraph. @@ -47,9 +47,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => unreachable!("unexpected eol/eof"), _ => { - tokenizer.enter(Token::Paragraph); - tokenizer.enter_with_content(Token::Data, Some(ContentType::Text)); - State::Retry(Name::ParagraphInside) + tokenizer.enter(Name::Paragraph); + tokenizer.enter_with_content(Name::Data, Some(Content::Text)); + State::Retry(StateName::ParagraphInside) } } } @@ -63,8 +63,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::Data); - tokenizer.exit(Token::Paragraph); + tokenizer.exit(Name::Data); + tokenizer.exit(Name::Paragraph); tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve)); // You’d be interrupting. tokenizer.interrupt = true; @@ -72,7 +72,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.consume(); - State::Next(Name::ParagraphInside) + State::Next(StateName::ParagraphInside) } } } @@ -85,21 +85,21 @@ pub fn resolve(tokenizer: &mut Tokenizer) { while index < tokenizer.events.len() { let event = &tokenizer.events[index]; - if event.event_type == EventType::Enter && event.token_type == Token::Paragraph { + if event.kind == Kind::Enter && event.name == Name::Paragraph { // Exit:Paragraph let mut exit_index = index + 3; let mut enter_next_index = - skip_opt(&tokenizer.events, exit_index + 1, &[Token::LineEnding]); + skip_opt(&tokenizer.events, exit_index + 1, &[Name::LineEnding]); // Enter:Paragraph enter_next_index = skip_opt( &tokenizer.events, enter_next_index, - &[Token::SpaceOrTab, Token::BlockQuotePrefix], + &[Name::SpaceOrTab, Name::BlockQuotePrefix], ); // Find future `Paragraphs`. while enter_next_index < tokenizer.events.len() - && tokenizer.events[enter_next_index].token_type == Token::Paragraph + && tokenizer.events[enter_next_index].name == Name::Paragraph { // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph. tokenizer.map.add(exit_index, 3, vec![]); @@ -123,12 +123,11 @@ pub fn resolve(tokenizer: &mut Tokenizer) { // Potential next start. exit_index = enter_next_index + 3; - enter_next_index = - skip_opt(&tokenizer.events, exit_index + 1, &[Token::LineEnding]); + enter_next_index = skip_opt(&tokenizer.events, exit_index + 1, &[Name::LineEnding]); enter_next_index = skip_opt( &tokenizer.events, enter_next_index, - &[Token::SpaceOrTab, Token::BlockQuotePrefix], + &[Name::SpaceOrTab, Name::BlockQuotePrefix], ); } diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs index cca0770..74ca04e 100644 --- a/src/construct/partial_bom.rs +++ b/src/construct/partial_bom.rs @@ -10,8 +10,8 @@ //! //! * [`micromark/lib/preprocess.js` in `micromark`](https://github.com/micromark/micromark/blob/ed23453/packages/micromark/dev/lib/preprocess.js#L54-L60) -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF]; @@ -24,8 +24,8 @@ const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF]; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(BOM[0]) { - tokenizer.enter(Token::ByteOrderMark); - State::Retry(Name::BomInside) + tokenizer.enter(Name::ByteOrderMark); + State::Retry(StateName::BomInside) } else { State::Nok } @@ -42,11 +42,11 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size += 1; tokenizer.consume(); if tokenizer.tokenize_state.size == BOM.len() { - tokenizer.exit(Token::ByteOrderMark); + tokenizer.exit(Name::ByteOrderMark); tokenizer.tokenize_state.size = 0; State::Ok } else { - State::Next(Name::BomInside) + State::Next(StateName::BomInside) } } else { tokenizer.tokenize_state.size = 0; diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index c05aaa5..fda021e 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -6,9 +6,9 @@ //! [string]: crate::content::string //! [text]: crate::content::text -use crate::state::{Name, State}; -use crate::token::Token; -use crate::tokenizer::{EventType, Tokenizer}; +use crate::event::{Kind, Name}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; /// At the beginning of data. /// @@ -20,11 +20,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Make sure to eat the first `markers`. Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => { - tokenizer.enter(Token::Data); + tokenizer.enter(Name::Data); tokenizer.consume(); - State::Next(Name::DataInside) + State::Next(StateName::DataInside) } - _ => State::Retry(Name::DataAtBreak), + _ => State::Retry(StateName::DataAtBreak), } } @@ -38,18 +38,18 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'\n') => { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::DataAtBreak) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::DataAtBreak) } Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => { tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data)); State::Ok } _ => { - tokenizer.enter(Token::Data); - State::Retry(Name::DataInside) + tokenizer.enter(Name::Data); + State::Retry(StateName::DataInside) } } } @@ -68,11 +68,11 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { }; if done { - tokenizer.exit(Token::Data); - State::Retry(Name::DataAtBreak) + tokenizer.exit(Name::Data); + State::Retry(StateName::DataAtBreak) } else { tokenizer.consume(); - State::Next(Name::DataInside) + State::Next(StateName::DataInside) } } @@ -85,13 +85,13 @@ pub fn resolve_data(tokenizer: &mut Tokenizer) { while index < len { let event = &tokenizer.events[index]; - if event.event_type == EventType::Enter && event.token_type == Token::Data { + if event.kind == Kind::Enter && event.name == Name::Data { let exit_index = index + 1; let mut exit_far_index = exit_index; // Find multiple `data` events. while exit_far_index + 1 < len - && tokenizer.events[exit_far_index + 1].token_type == Token::Data + && tokenizer.events[exit_far_index + 1].name == Name::Data { exit_far_index += 2; } diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index 5aa0539..c1c1e10 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -71,9 +71,9 @@ //! [label_end]: crate::construct::label_end //! [sanitize_uri]: crate::util::sanitize_uri -use crate::state::{Name, State}; -use crate::token::Token; -use crate::tokenizer::{ContentType, Tokenizer}; +use crate::event::{Content, Name}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; /// Before a destination. /// @@ -91,7 +91,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); tokenizer.consume(); tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); - State::Next(Name::DestinationEnclosedBefore) + State::Next(StateName::DestinationEnclosedBefore) } // ASCII control, space, closing paren, but *not* `\0`. None | Some(0x01..=0x1F | b' ' | b')' | 0x7F) => State::Nok, @@ -99,8 +99,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(tokenizer.tokenize_state.token_1.clone()); tokenizer.enter(tokenizer.tokenize_state.token_4.clone()); tokenizer.enter(tokenizer.tokenize_state.token_5.clone()); - tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - State::Retry(Name::DestinationRaw) + tokenizer.enter_with_content(Name::Data, Some(Content::String)); + State::Retry(StateName::DestinationRaw) } } } @@ -121,8 +121,8 @@ pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State { State::Ok } else { tokenizer.enter(tokenizer.tokenize_state.token_5.clone()); - tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); - State::Retry(Name::DestinationEnclosed) + tokenizer.enter_with_content(Name::Data, Some(Content::String)); + State::Retry(StateName::DestinationEnclosed) } } @@ -136,17 +136,17 @@ pub fn enclosed(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n' | b'<') => State::Nok, Some(b'>') => { - tokenizer.exit(Token::Data); + tokenizer.exit(Name::Data); tokenizer.exit(tokenizer.tokenize_state.token_5.clone()); - State::Retry(Name::DestinationEnclosedBefore) + State::Retry(StateName::DestinationEnclosedBefore) } Some(b'\\') => { tokenizer.consume(); - State::Next(Name::DestinationEnclosedEscape) + State::Next(StateName::DestinationEnclosedEscape) } _ => { tokenizer.consume(); - State::Next(Name::DestinationEnclosed) + State::Next(StateName::DestinationEnclosed) } } } @@ -161,9 +161,9 @@ pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'<' | b'>' | b'\\') => { tokenizer.consume(); - State::Next(Name::DestinationEnclosed) + State::Next(StateName::DestinationEnclosed) } - _ => State::Retry(Name::DestinationEnclosed), + _ => State::Retry(StateName::DestinationEnclosed), } } @@ -176,7 +176,7 @@ pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { pub fn raw(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\t' | b'\n' | b' ' | b')') if tokenizer.tokenize_state.size == 0 => { - tokenizer.exit(Token::Data); + tokenizer.exit(Name::Data); tokenizer.exit(tokenizer.tokenize_state.token_5.clone()); tokenizer.exit(tokenizer.tokenize_state.token_4.clone()); tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); @@ -186,7 +186,7 @@ pub fn raw(tokenizer: &mut Tokenizer) -> State { Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_b => { tokenizer.consume(); tokenizer.tokenize_state.size += 1; - State::Next(Name::DestinationRaw) + State::Next(StateName::DestinationRaw) } // ASCII control (but *not* `\0`) and space and `(`. None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) => { @@ -196,15 +196,15 @@ pub fn raw(tokenizer: &mut Tokenizer) -> State { Some(b')') => { tokenizer.consume(); tokenizer.tokenize_state.size -= 1; - State::Next(Name::DestinationRaw) + State::Next(StateName::DestinationRaw) } Some(b'\\') => { tokenizer.consume(); - State::Next(Name::DestinationRawEscape) + State::Next(StateName::DestinationRawEscape) } Some(_) => { tokenizer.consume(); - State::Next(Name::DestinationRaw) + State::Next(StateName::DestinationRaw) } } } @@ -219,8 +219,8 @@ pub fn raw_escape(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'(' | b')' | b'\\') => { tokenizer.consume(); - State::Next(Name::DestinationRaw) + State::Next(StateName::DestinationRaw) } - _ => State::Retry(Name::DestinationRaw), + _ => State::Retry(StateName::DestinationRaw), } } diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 6e7c843..85769eb 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -58,12 +58,12 @@ //! [code_text]: crate::construct::code_text //! [link_reference_size_max]: crate::constant::LINK_REFERENCE_SIZE_MAX -use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions}; use crate::constant::LINK_REFERENCE_SIZE_MAX; -use crate::state::{Name, State}; +use crate::construct::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions}; +use crate::event::{Content, Name}; +use crate::state::{Name as StateName, State}; use crate::subtokenize::link; -use crate::token::Token; -use crate::tokenizer::{ContentType, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Before a label. /// @@ -79,7 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); - State::Next(Name::LabelAtBreak) + State::Next(StateName::LabelAtBreak) } _ => State::Nok, } @@ -106,14 +106,14 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_eol_with_options( tokenizer, EolOptions { - content_type: Some(ContentType::String), + content_type: Some(Content::String), connect: tokenizer.tokenize_state.connect, }, ); tokenizer.attempt( name, - State::Next(Name::LabelEolAfter), - State::Next(Name::LabelAtBlankLine), + State::Next(StateName::LabelEolAfter), + State::Next(StateName::LabelAtBlankLine), ) } Some(b']') => { @@ -128,7 +128,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { State::Ok } _ => { - tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); + tokenizer.enter_with_content(Name::Data, Some(Content::String)); if tokenizer.tokenize_state.connect { let index = tokenizer.events.len() - 1; @@ -137,7 +137,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; } - State::Retry(Name::LabelInside) + State::Retry(StateName::LabelInside) } } } @@ -152,7 +152,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn eol_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; - State::Retry(Name::LabelAtBreak) + State::Retry(StateName::LabelAtBreak) } /// In a label, at a blank line. @@ -178,13 +178,13 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State { pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n' | b'[' | b']') => { - tokenizer.exit(Token::Data); - State::Retry(Name::LabelAtBreak) + tokenizer.exit(Name::Data); + State::Retry(StateName::LabelAtBreak) } Some(byte) => { if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX { - tokenizer.exit(Token::Data); - State::Retry(Name::LabelAtBreak) + tokenizer.exit(Name::Data); + State::Retry(StateName::LabelAtBreak) } else { tokenizer.consume(); tokenizer.tokenize_state.size += 1; @@ -192,9 +192,9 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.seen = true; } State::Next(if matches!(byte, b'\\') { - Name::LabelEscape + StateName::LabelEscape } else { - Name::LabelInside + StateName::LabelInside }) } } @@ -212,8 +212,8 @@ pub fn escape(tokenizer: &mut Tokenizer) -> State { Some(b'[' | b'\\' | b']') => { tokenizer.consume(); tokenizer.tokenize_state.size += 1; - State::Next(Name::LabelInside) + State::Next(StateName::LabelInside) } - _ => State::Retry(Name::LabelInside), + _ => State::Retry(StateName::LabelInside), } } diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 497c81e..76854c8 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -10,8 +10,8 @@ //! [code_indented]: crate::construct::code_indented //! [html_flow]: crate::construct::html_flow -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of continuation. @@ -24,10 +24,10 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::NonLazyContinuationAfter) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::NonLazyContinuationAfter) } _ => State::Nok, } diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index a8e8f06..3fc9484 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -4,10 +4,10 @@ //! //! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js) -use crate::state::{Name, State}; +use crate::event::{Content, Name}; +use crate::state::{Name as StateName, State}; use crate::subtokenize::link; -use crate::token::Token; -use crate::tokenizer::{ContentType, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Options to parse `space_or_tab`. #[derive(Debug)] @@ -17,11 +17,11 @@ pub struct Options { /// Maximum allowed bytes (inclusive). pub max: usize, /// Token type to use for whitespace events. - pub kind: Token, + pub kind: Name, /// Connect this whitespace to the previous. pub connect: bool, /// Embedded content type to use. - pub content_type: Option<ContentType>, + pub content_type: Option<Content>, } /// Options to parse `space_or_tab` and one optional eol, but no blank line. @@ -30,7 +30,7 @@ pub struct EolOptions { /// Connect this whitespace to the previous. pub connect: bool, /// Embedded content type to use. - pub content_type: Option<ContentType>, + pub content_type: Option<Content>, } /// One or more `space_or_tab`. @@ -38,7 +38,7 @@ pub struct EolOptions { /// ```bnf /// space_or_tab ::= 1*( ' ' '\t' ) /// ``` -pub fn space_or_tab(tokenizer: &mut Tokenizer) -> Name { +pub fn space_or_tab(tokenizer: &mut Tokenizer) -> StateName { space_or_tab_min_max(tokenizer, 1, usize::MAX) } @@ -47,11 +47,11 @@ pub fn space_or_tab(tokenizer: &mut Tokenizer) -> Name { /// ```bnf /// space_or_tab_min_max ::= x*y( ' ' '\t' ) /// ``` -pub fn space_or_tab_min_max(tokenizer: &mut Tokenizer, min: usize, max: usize) -> Name { +pub fn space_or_tab_min_max(tokenizer: &mut Tokenizer, min: usize, max: usize) -> StateName { space_or_tab_with_options( tokenizer, Options { - kind: Token::SpaceOrTab, + kind: Name::SpaceOrTab, min, max, content_type: None, @@ -61,13 +61,13 @@ pub fn space_or_tab_min_max(tokenizer: &mut Tokenizer, min: usize, max: usize) - } /// `space_or_tab`, with the given options. -pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> Name { +pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName { tokenizer.tokenize_state.space_or_tab_connect = options.connect; tokenizer.tokenize_state.space_or_tab_content_type = options.content_type; tokenizer.tokenize_state.space_or_tab_min = options.min; tokenizer.tokenize_state.space_or_tab_max = options.max; tokenizer.tokenize_state.space_or_tab_token = options.kind; - Name::SpaceOrTabStart + StateName::SpaceOrTabStart } /// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and @@ -76,7 +76,7 @@ pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> /// ```bnf /// space_or_tab_eol ::= 1*( ' ' '\t' ) | 0*( ' ' '\t' ) eol 0*( ' ' '\t' ) /// ``` -pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> Name { +pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName { space_or_tab_eol_with_options( tokenizer, EolOptions { @@ -87,10 +87,10 @@ pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> Name { } /// `space_or_tab_eol`, with the given options. -pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: EolOptions) -> Name { +pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: EolOptions) -> StateName { tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type; tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect; - Name::SpaceOrTabEolStart + StateName::SpaceOrTabEolStart } /// Before `space_or_tab`. @@ -114,9 +114,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_connect = true; } - State::Retry(Name::SpaceOrTabInside) + State::Retry(StateName::SpaceOrTabInside) } - _ => State::Retry(Name::SpaceOrTabAfter), + _ => State::Retry(StateName::SpaceOrTabAfter), } } @@ -134,11 +134,11 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { { tokenizer.consume(); tokenizer.tokenize_state.space_or_tab_size += 1; - State::Next(Name::SpaceOrTabInside) + State::Next(StateName::SpaceOrTabInside) } _ => { tokenizer.exit(tokenizer.tokenize_state.space_or_tab_token.clone()); - State::Retry(Name::SpaceOrTabAfter) + State::Retry(StateName::SpaceOrTabAfter) } } } @@ -162,7 +162,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_size = 0; tokenizer.tokenize_state.space_or_tab_max = 0; tokenizer.tokenize_state.space_or_tab_min = 0; - tokenizer.tokenize_state.space_or_tab_token = Token::SpaceOrTab; + tokenizer.tokenize_state.space_or_tab_token = Name::SpaceOrTab; state } @@ -170,7 +170,7 @@ pub fn eol_start(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_with_options( tokenizer, Options { - kind: Token::SpaceOrTab, + kind: Name::SpaceOrTab, min: 1, max: usize::MAX, content_type: tokenizer @@ -183,8 +183,8 @@ pub fn eol_start(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( name, - State::Next(Name::SpaceOrTabEolAfterFirst), - State::Next(Name::SpaceOrTabEolAtEol), + State::Next(StateName::SpaceOrTabEolAfterFirst), + State::Next(StateName::SpaceOrTabEolAtEol), ) } @@ -199,7 +199,7 @@ pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.space_or_tab_eol_connect = true; } - State::Retry(Name::SpaceOrTabEolAtEol) + State::Retry(StateName::SpaceOrTabEolAtEol) } /// `space_or_tab_eol`: after optionally first `space_or_tab`. @@ -212,7 +212,7 @@ pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State { pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State { if let Some(b'\n') = tokenizer.current { tokenizer.enter_with_content( - Token::LineEnding, + Name::LineEnding, tokenizer .tokenize_state .space_or_tab_eol_content_type @@ -231,8 +231,8 @@ pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State { } tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::SpaceOrTabEolAfterEol) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::SpaceOrTabEolAfterEol) } else { let ok = tokenizer.tokenize_state.space_or_tab_eol_ok; tokenizer.tokenize_state.space_or_tab_eol_content_type = None; @@ -258,7 +258,7 @@ pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_with_options( tokenizer, Options { - kind: Token::SpaceOrTab, + kind: Name::SpaceOrTab, min: 1, max: usize::MAX, content_type: tokenizer @@ -270,8 +270,8 @@ pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State { ); tokenizer.attempt( name, - State::Next(Name::SpaceOrTabEolAfterMore), - State::Next(Name::SpaceOrTabEolAfterMore), + State::Next(StateName::SpaceOrTabEolAfterMore), + State::Next(StateName::SpaceOrTabEolAfterMore), ) } diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 11c28bd..be06c02 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -31,10 +31,10 @@ //! [label_end]: crate::construct::label_end use crate::construct::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions}; -use crate::state::{Name, State}; +use crate::event::{Content, Name}; +use crate::state::{Name as StateName, State}; use crate::subtokenize::link; -use crate::token::Token; -use crate::tokenizer::{ContentType, Tokenizer}; +use crate::tokenizer::Tokenizer; /// Before a title. /// @@ -51,7 +51,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); tokenizer.consume(); tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); - State::Next(Name::TitleBegin) + State::Next(StateName::TitleBegin) } _ => State::Nok, } @@ -80,7 +80,7 @@ pub fn begin(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); - State::Retry(Name::TitleAtBreak) + State::Retry(StateName::TitleAtBreak) } } } @@ -102,25 +102,25 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_eol_with_options( tokenizer, EolOptions { - content_type: Some(ContentType::String), + content_type: Some(Content::String), connect: tokenizer.tokenize_state.connect, }, ); tokenizer.attempt( name, - State::Next(Name::TitleAfterEol), - State::Next(Name::TitleAtBlankLine), + State::Next(StateName::TitleAfterEol), + State::Next(StateName::TitleAtBlankLine), ) } Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); - State::Retry(Name::TitleBegin) + State::Retry(StateName::TitleBegin) } Some(_) => { - tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); + tokenizer.enter_with_content(Name::Data, Some(Content::String)); if tokenizer.tokenize_state.connect { let index = tokenizer.events.len() - 1; @@ -129,7 +129,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; } - State::Retry(Name::TitleInside) + State::Retry(StateName::TitleInside) } } } @@ -143,7 +143,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn after_eol(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.connect = true; - State::Retry(Name::TitleAtBreak) + State::Retry(StateName::TitleAtBreak) } /// In a title, at a blank line. @@ -169,21 +169,21 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State { pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Token::Data); - State::Retry(Name::TitleAtBreak) + tokenizer.exit(Name::Data); + State::Retry(StateName::TitleAtBreak) } Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { - tokenizer.exit(Token::Data); - State::Retry(Name::TitleAtBreak) + tokenizer.exit(Name::Data); + State::Retry(StateName::TitleAtBreak) } Some(byte) => { tokenizer.consume(); State::Next(if matches!(byte, b'\\') { - Name::TitleEscape + StateName::TitleEscape } else { - Name::TitleInside + StateName::TitleInside }) } } @@ -199,8 +199,8 @@ pub fn escape(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'"' | b'\'' | b')') => { tokenizer.consume(); - State::Next(Name::TitleInside) + State::Next(StateName::TitleInside) } - _ => State::Retry(Name::TitleInside), + _ => State::Retry(StateName::TitleInside), } } diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs index 0905e10..688e7fd 100644 --- a/src/construct/partial_whitespace.rs +++ b/src/construct/partial_whitespace.rs @@ -46,8 +46,8 @@ //! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN; -use crate::token::Token; -use crate::tokenizer::{Event, EventType, Tokenizer}; +use crate::event::{Event, Kind, Name}; +use crate::tokenizer::Tokenizer; use crate::util::slice::{Position, Slice}; /// Resolve whitespace. @@ -57,12 +57,12 @@ pub fn resolve_whitespace(tokenizer: &mut Tokenizer, hard_break: bool, trim_whol while index < tokenizer.events.len() { let event = &tokenizer.events[index]; - if event.event_type == EventType::Exit && event.token_type == Token::Data { + if event.kind == Kind::Exit && event.name == Name::Data { let trim_start = (trim_whole && index == 1) - || (index > 1 && tokenizer.events[index - 2].token_type == Token::LineEnding); + || (index > 1 && tokenizer.events[index - 2].name == Name::LineEnding); let trim_end = (trim_whole && index == tokenizer.events.len() - 1) || (index + 1 < tokenizer.events.len() - && tokenizer.events[index + 1].token_type == Token::LineEnding); + && tokenizer.events[index + 1].name == Name::LineEnding); trim_data(tokenizer, index, trim_start, trim_end, hard_break); } @@ -98,21 +98,21 @@ fn trim_data( } let diff = slice.bytes.len() - index; - let token_type = if hard_break + let name = if hard_break && spaces_only && diff >= HARD_BREAK_PREFIX_SIZE_MIN && exit_index + 1 < tokenizer.events.len() { - Token::HardBreakTrailing + Name::HardBreakTrailing } else { - Token::SpaceOrTab + Name::SpaceOrTab }; // The whole data is whitespace. // We can be very fast: we only change the token types. if index == 0 { - tokenizer.events[exit_index - 1].token_type = token_type.clone(); - tokenizer.events[exit_index].token_type = token_type; + tokenizer.events[exit_index - 1].name = name.clone(); + tokenizer.events[exit_index].name = name; return; } @@ -128,14 +128,14 @@ fn trim_data( 0, vec![ Event { - event_type: EventType::Enter, - token_type: token_type.clone(), + kind: Kind::Enter, + name: name.clone(), point: enter_point.clone(), link: None, }, Event { - event_type: EventType::Exit, - token_type, + kind: Kind::Exit, + name, point: exit_point, link: None, }, @@ -159,8 +159,8 @@ fn trim_data( // The whole data is whitespace. // We can be very fast: we only change the token types. if index == slice.bytes.len() { - tokenizer.events[exit_index - 1].token_type = Token::SpaceOrTab; - tokenizer.events[exit_index].token_type = Token::SpaceOrTab; + tokenizer.events[exit_index - 1].name = Name::SpaceOrTab; + tokenizer.events[exit_index].name = Name::SpaceOrTab; return; } @@ -176,14 +176,14 @@ fn trim_data( 0, vec![ Event { - event_type: EventType::Enter, - token_type: Token::SpaceOrTab, + kind: Kind::Enter, + name: Name::SpaceOrTab, point: enter_point, link: None, }, Event { - event_type: EventType::Exit, - token_type: Token::SpaceOrTab, + kind: Kind::Exit, + name: Name::SpaceOrTab, point: exit_point.clone(), link: None, }, diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index fc71d73..8e3c4f4 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -50,8 +50,8 @@ use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN}; -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Start of a thematic break. @@ -62,7 +62,7 @@ use crate::tokenizer::Tokenizer; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.constructs.thematic_break { - tokenizer.enter(Token::ThematicBreak); + tokenizer.enter(Name::ThematicBreak); let name = space_or_tab_min_max( tokenizer, 0, @@ -73,7 +73,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { }, ); - tokenizer.attempt(name, State::Next(Name::ThematicBreakBefore), State::Nok) + tokenizer.attempt( + name, + State::Next(StateName::ThematicBreakBefore), + State::Nok, + ) } else { State::Nok } @@ -89,7 +93,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'*' | b'-' | b'_') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); - State::Retry(Name::ThematicBreakAtBreak) + State::Retry(StateName::ThematicBreakAtBreak) } _ => State::Nok, } @@ -106,7 +110,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => { tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size = 0; - tokenizer.exit(Token::ThematicBreak); + tokenizer.exit(Name::ThematicBreak); // Feel free to interrupt. tokenizer.interrupt = false; State::Ok @@ -114,8 +118,8 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { Some(b'*' | b'-' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { - tokenizer.enter(Token::ThematicBreakSequence); - State::Retry(Name::ThematicBreakSequence) + tokenizer.enter(Name::ThematicBreakSequence); + State::Retry(StateName::ThematicBreakSequence) } _ => { tokenizer.tokenize_state.marker = 0; @@ -138,15 +142,15 @@ pub fn sequence(tokenizer: &mut Tokenizer) -> State { { tokenizer.consume(); tokenizer.tokenize_state.size += 1; - State::Next(Name::ThematicBreakSequence) + State::Next(StateName::ThematicBreakSequence) } _ => { - tokenizer.exit(Token::ThematicBreakSequence); + tokenizer.exit(Name::ThematicBreakSequence); let name = space_or_tab(tokenizer); tokenizer.attempt( name, - State::Next(Name::ThematicBreakAtBreak), - State::Next(Name::ThematicBreakAtBreak), + State::Next(StateName::ThematicBreakAtBreak), + State::Next(StateName::ThematicBreakAtBreak), ) } } diff --git a/src/content/document.rs b/src/content/document.rs index b5ff532..998bc06 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -8,13 +8,11 @@ //! * [Block quote][crate::construct::block_quote] //! * [List][crate::construct::list] +use crate::event::{Content, Event, Kind, Link, Name, Point}; use crate::parser::ParseState; -use crate::state::{Name, State}; +use crate::state::{Name as StateName, State}; use crate::subtokenize::{divide_events, subtokenize}; -use crate::token::Token; -use crate::tokenizer::{ - Container, ContainerState, ContentType, Event, EventType, Link, Point, Tokenizer, -}; +use crate::tokenizer::{Container, ContainerState, Tokenizer}; use crate::util::{ normalize_identifier::normalize_identifier, skip, @@ -59,7 +57,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> { let state = tokenizer.push( (0, 0), (parse_state.bytes.len(), 0), - State::Next(Name::DocumentStart), + State::Next(StateName::DocumentStart), ); tokenizer.flush(state, true); @@ -69,7 +67,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> { while index < tokenizer.events.len() { let event = &tokenizer.events[index]; - if event.event_type == EventType::Exit && event.token_type == Token::DefinitionLabelString { + if event.kind == Kind::Exit && event.name == Name::DefinitionLabelString { // Note: we don’t care about virtual spaces, so `as_str` is fine. let id = normalize_identifier( Slice::from_position( @@ -111,9 +109,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ))); tokenizer.attempt( - Name::BomStart, - State::Next(Name::DocumentContainerExistingBefore), - State::Next(Name::DocumentContainerExistingBefore), + StateName::BomStart, + State::Next(StateName::DocumentContainerExistingBefore), + State::Next(StateName::DocumentContainerExistingBefore), ) } @@ -134,16 +132,16 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( match container.kind { - Container::BlockQuote => Name::BlockQuoteContStart, - Container::ListItem => Name::ListContStart, + Container::BlockQuote => StateName::BlockQuoteContStart, + Container::ListItem => StateName::ListContStart, }, - State::Next(Name::DocumentContainerExistingAfter), - State::Next(Name::DocumentContainerNewBefore), + State::Next(StateName::DocumentContainerExistingAfter), + State::Next(StateName::DocumentContainerNewBefore), ) } // Otherwise, check new containers. else { - State::Retry(Name::DocumentContainerNewBefore) + State::Retry(StateName::DocumentContainerNewBefore) } } @@ -156,7 +154,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_continued += 1; - State::Retry(Name::DocumentContainerExistingBefore) + State::Retry(StateName::DocumentContainerExistingBefore) } /// Before a new container. @@ -180,7 +178,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { // …and if we’re in a concrete construct, new containers can’t “pierce” // into them. if child.concrete { - return State::Retry(Name::DocumentContainersAfter); + return State::Retry(StateName::DocumentContainersAfter); } } @@ -203,9 +201,9 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { .swap(tokenizer.tokenize_state.document_continued, tail); tokenizer.attempt( - Name::BlockQuoteStart, - State::Next(Name::DocumentContainerNewAfter), - State::Next(Name::DocumentContainerNewBeforeNotBlockQuote), + StateName::BlockQuoteStart, + State::Next(StateName::DocumentContainerNewAfter), + State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote), ) } @@ -226,9 +224,9 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State }; tokenizer.attempt( - Name::ListStart, - State::Next(Name::DocumentContainerNewAfter), - State::Next(Name::DocumentContainerNewBeforeNotList), + StateName::ListStart, + State::Next(StateName::DocumentContainerNewAfter), + State::Next(StateName::DocumentContainerNewBeforeNotList), ) } @@ -247,7 +245,7 @@ pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State { .document_container_stack .swap_remove(tokenizer.tokenize_state.document_continued); - State::Retry(Name::DocumentContainersAfter) + State::Retry(StateName::DocumentContainersAfter) } /// After a new container. @@ -281,7 +279,7 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { .push(container); tokenizer.tokenize_state.document_continued += 1; tokenizer.interrupt = false; - State::Retry(Name::DocumentContainerNewBefore) + State::Retry(StateName::DocumentContainerNewBefore) } /// After containers, before flow. @@ -301,7 +299,7 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Note: EOL is part of data. - None => State::Retry(Name::DocumentFlowEnd), + None => State::Retry(StateName::DocumentFlowEnd), Some(_) => { let current = tokenizer.events.len(); let previous = tokenizer.tokenize_state.document_data_index; @@ -310,14 +308,14 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { } tokenizer.tokenize_state.document_data_index = Some(current); tokenizer.enter_with_link( - Token::Data, + Name::Data, Some(Link { previous, next: None, - content_type: ContentType::Flow, + content_type: Content::Flow, }), ); - State::Retry(Name::DocumentFlowInside) + State::Retry(StateName::DocumentFlowInside) } } } @@ -331,18 +329,18 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { pub fn flow_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { - tokenizer.exit(Token::Data); - State::Retry(Name::DocumentFlowEnd) + tokenizer.exit(Name::Data); + State::Retry(StateName::DocumentFlowEnd) } // Note: EOL is part of data. Some(b'\n') => { tokenizer.consume(); - tokenizer.exit(Token::Data); - State::Next(Name::DocumentFlowEnd) + tokenizer.exit(Name::Data); + State::Next(StateName::DocumentFlowEnd) } Some(_) => { tokenizer.consume(); - State::Next(Name::DocumentFlowInside) + State::Next(StateName::DocumentFlowInside) } } } @@ -359,7 +357,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { let state = tokenizer .tokenize_state .document_child_state - .unwrap_or(State::Next(Name::FlowStart)); + .unwrap_or(State::Next(StateName::FlowStart)); tokenizer.tokenize_state.document_exits.push(None); @@ -369,12 +367,12 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { state, ); - let paragraph = matches!(state, State::Next(Name::ParagraphInside)) + let paragraph = matches!(state, State::Next(StateName::ParagraphInside)) || (!child.events.is_empty() && child.events - [skip::opt_back(&child.events, child.events.len() - 1, &[Token::LineEnding])] - .token_type - == Token::Paragraph); + [skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding])] + .name + == Name::Paragraph); tokenizer.tokenize_state.document_child_state = Some(state); @@ -401,7 +399,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_paragraph_before = paragraph; // Containers would only be interrupting if we’ve continued. tokenizer.interrupt = false; - State::Retry(Name::DocumentContainerExistingBefore) + State::Retry(StateName::DocumentContainerExistingBefore) } } } @@ -421,7 +419,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { .tokenize_state .document_child_state .take() - .unwrap_or(State::Next(Name::FlowStart)); + .unwrap_or(State::Next(StateName::FlowStart)); child.flush(state, false); } @@ -433,14 +431,14 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { while !stack_close.is_empty() { let container = stack_close.pop().unwrap(); - let token_type = match container.kind { - Container::BlockQuote => Token::BlockQuote, - Container::ListItem => Token::ListItem, + let name = match container.kind { + Container::BlockQuote => Name::BlockQuote, + Container::ListItem => Name::ListItem, }; exits.push(Event { - event_type: EventType::Exit, - token_type: token_type.clone(), + kind: Kind::Exit, + name: name.clone(), point: tokenizer.point.clone(), link: None, }); @@ -451,7 +449,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { while stack_index > 0 { stack_index -= 1; - if tokenizer.stack[stack_index] == token_type { + if tokenizer.stack[stack_index] == name { tokenizer.stack.remove(stack_index); found = true; break; @@ -482,8 +480,8 @@ fn resolve(tokenizer: &mut Tokenizer) { while child_index < child.events.len() { let event = &child.events[child_index]; - if event.event_type == EventType::Enter - && (event.token_type == Token::LineEnding || event.token_type == Token::BlankLineEnding) + if event.kind == Kind::Enter + && (event.name == Name::LineEnding || event.name == Name::BlankLineEnding) { if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() { let mut exit_index = 0; @@ -507,7 +505,7 @@ fn resolve(tokenizer: &mut Tokenizer) { divide_events( &mut tokenizer.map, &tokenizer.events, - skip::to(&tokenizer.events, 0, &[Token::Data]), + skip::to(&tokenizer.events, 0, &[Name::Data]), &mut child.events, ); diff --git a/src/content/flow.rs b/src/content/flow.rs index 16a1cba..7eb7b64 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -19,8 +19,8 @@ //! * [HTML (flow)][crate::construct::html_flow] //! * [Thematic break][crate::construct::thematic_break] -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Before flow. @@ -36,42 +36,42 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`' | b'~') => tokenizer.attempt( - Name::CodeFencedStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::CodeFencedStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), Some(b'<') => tokenizer.attempt( - Name::HtmlFlowStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::HtmlFlowStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), Some(b'#') => tokenizer.attempt( - Name::HeadingAtxStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::HeadingAtxStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), // Note: `-` is also used in thematic breaks, so it’s not included here. Some(b'=') => tokenizer.attempt( - Name::HeadingSetextStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::HeadingSetextStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), Some(b'*' | b'_') => tokenizer.attempt( - Name::ThematicBreakStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::ThematicBreakStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), Some(b'[') => tokenizer.attempt( - Name::DefinitionStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::DefinitionStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), // Actual parsing: blank line? Indented code? Indented anything? // Also includes `-` which can be a setext heading underline or a thematic break. - None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(Name::FlowBlankLineBefore), + None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), Some(_) => tokenizer.attempt( - Name::ParagraphStart, - State::Next(Name::FlowAfter), + StateName::ParagraphStart, + State::Next(StateName::FlowAfter), State::Nok, ), } @@ -79,9 +79,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::BlankLineStart, - State::Next(Name::FlowBlankLineAfter), - State::Next(Name::FlowBeforeCodeIndented), + StateName::BlankLineStart, + State::Next(StateName::FlowBlankLineAfter), + State::Next(StateName::FlowBeforeCodeIndented), ) } @@ -99,57 +99,57 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::CodeIndentedStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeCodeFenced), + StateName::CodeIndentedStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeCodeFenced), ) } pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::CodeFencedStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeHtml), + StateName::CodeFencedStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHtml), ) } pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HtmlFlowStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeHeadingAtx), + StateName::HtmlFlowStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHeadingAtx), ) } pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HeadingAtxStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeHeadingSetext), + StateName::HeadingAtxStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHeadingSetext), ) } pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HeadingSetextStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeThematicBreak), + StateName::HeadingSetextStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeThematicBreak), ) } pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::ThematicBreakStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeDefinition), + StateName::ThematicBreakStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeDefinition), ) } pub fn before_definition(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::DefinitionStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::DefinitionStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ) } @@ -164,12 +164,12 @@ pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'\n') => { - tokenizer.enter(Token::BlankLineEnding); + tokenizer.enter(Name::BlankLineEnding); tokenizer.consume(); - tokenizer.exit(Token::BlankLineEnding); + tokenizer.exit(Name::BlankLineEnding); // Feel free to interrupt. tokenizer.interrupt = false; - State::Next(Name::FlowStart) + State::Next(StateName::FlowStart) } _ => unreachable!("expected eol/eof"), } @@ -188,10 +188,10 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'\n') => { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::FlowStart) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::FlowStart) } _ => unreachable!("expected eol/eof"), } @@ -204,8 +204,8 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::ParagraphStart, - State::Next(Name::FlowAfter), + StateName::ParagraphStart, + State::Next(StateName::FlowAfter), State::Nok, ) } diff --git a/src/content/string.rs b/src/content/string.rs index 927f582..79dee6c 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -13,7 +13,7 @@ //! [text]: crate::content::text use crate::construct::partial_whitespace::resolve_whitespace; -use crate::state::{Name, State}; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; const MARKERS: [u8; 2] = [b'&', b'\\']; @@ -22,7 +22,7 @@ const MARKERS: [u8; 2] = [b'&', b'\\']; pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(Name::StringBefore) + State::Retry(StateName::StringBefore) } /// Before string. @@ -30,22 +30,26 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'&') => tokenizer.attempt( - Name::CharacterReferenceStart, - State::Next(Name::StringBefore), - State::Next(Name::StringBeforeData), + StateName::CharacterReferenceStart, + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), ), Some(b'\\') => tokenizer.attempt( - Name::CharacterEscapeStart, - State::Next(Name::StringBefore), - State::Next(Name::StringBeforeData), + StateName::CharacterEscapeStart, + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), ), - _ => State::Retry(Name::StringBeforeData), + _ => State::Retry(StateName::StringBeforeData), } } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(Name::DataStart, State::Next(Name::StringBefore), State::Nok) + tokenizer.attempt( + StateName::DataStart, + State::Next(StateName::StringBefore), + State::Nok, + ) } /// Resolve whitespace. diff --git a/src/content/text.rs b/src/content/text.rs index 1b3890e..77c5963 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -21,7 +21,7 @@ //! > [whitespace][crate::construct::partial_whitespace]. use crate::construct::partial_whitespace::resolve_whitespace; -use crate::state::{Name, State}; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; const MARKERS: [u8; 9] = [ @@ -40,7 +40,7 @@ const MARKERS: [u8; 9] = [ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(Name::TextBefore) + State::Retry(StateName::TextBefore) } /// Before text. @@ -48,71 +48,75 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'!') => tokenizer.attempt( - Name::LabelStartImageStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::LabelStartImageStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), Some(b'&') => tokenizer.attempt( - Name::CharacterReferenceStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::CharacterReferenceStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), Some(b'*' | b'_') => tokenizer.attempt( - Name::AttentionStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::AttentionStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), // `autolink`, `html_text` (order does not matter) Some(b'<') => tokenizer.attempt( - Name::AutolinkStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeHtml), + StateName::AutolinkStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeHtml), ), Some(b'[') => tokenizer.attempt( - Name::LabelStartLinkStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::LabelStartLinkStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), Some(b'\\') => tokenizer.attempt( - Name::CharacterEscapeStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeHardBreakEscape), + StateName::CharacterEscapeStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeHardBreakEscape), ), Some(b']') => tokenizer.attempt( - Name::LabelEndStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::LabelEndStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), Some(b'`') => tokenizer.attempt( - Name::CodeTextStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::CodeTextStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), - _ => State::Retry(Name::TextBeforeData), + _ => State::Retry(StateName::TextBeforeData), } } /// At `<`, which wasn’t an autolink: before HTML? pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HtmlTextStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::HtmlTextStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ) } /// At `\`, which wasn’t a character escape: before a hard break? pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HardBreakEscapeStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::HardBreakEscapeStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ) } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(Name::DataStart, State::Next(Name::TextBefore), State::Nok) + tokenizer.attempt( + StateName::DataStart, + State::Next(StateName::TextBefore), + State::Nok, + ) } /// Resolve whitespace. diff --git a/src/token.rs b/src/event.rs index db3bffc..ba07028 100644 --- a/src/token.rs +++ b/src/event.rs @@ -1,6 +1,6 @@ /// Semantic label of a span. #[derive(Debug, Clone, PartialEq, Hash, Eq)] -pub enum Token { +pub enum Name { /// Attention sequence. /// /// > 👉 **Note**: this is used while parsing but compiled away. @@ -1832,46 +1832,104 @@ pub enum Token { ThematicBreakSequence, } -/// List of void tokens, used to make sure everything is working good. -pub const VOID_TOKENS: [Token; 40] = [ - Token::AttentionSequence, - Token::AutolinkEmail, - Token::AutolinkMarker, - Token::AutolinkProtocol, - Token::BlankLineEnding, - Token::BlockQuoteMarker, - Token::ByteOrderMark, - Token::CharacterEscapeMarker, - Token::CharacterEscapeValue, - Token::CharacterReferenceMarker, - Token::CharacterReferenceMarkerHexadecimal, - Token::CharacterReferenceMarkerNumeric, - Token::CharacterReferenceMarkerSemi, - Token::CharacterReferenceValue, - Token::CodeFencedFenceSequence, - Token::CodeFlowChunk, - Token::CodeTextData, - Token::CodeTextSequence, - Token::Data, - Token::DefinitionDestinationLiteralMarker, - Token::DefinitionLabelMarker, - Token::DefinitionMarker, - Token::DefinitionTitleMarker, - Token::EmphasisSequence, - Token::HardBreakEscape, - Token::HardBreakTrailing, - Token::HeadingAtxSequence, - Token::HeadingSetextUnderline, - Token::HtmlFlowData, - Token::HtmlTextData, - Token::LabelImageMarker, - Token::LabelMarker, - Token::LineEnding, - Token::ListItemMarker, - Token::ListItemValue, - Token::ReferenceMarker, - Token::ResourceMarker, - Token::ResourceTitleMarker, - Token::StrongSequence, - Token::ThematicBreakSequence, +/// List of void tokens, used to make sure everything is working well. +pub const VOID_EVENTS: [Name; 40] = [ + Name::AttentionSequence, + Name::AutolinkEmail, + Name::AutolinkMarker, + Name::AutolinkProtocol, + Name::BlankLineEnding, + Name::BlockQuoteMarker, + Name::ByteOrderMark, + Name::CharacterEscapeMarker, + Name::CharacterEscapeValue, + Name::CharacterReferenceMarker, + Name::CharacterReferenceMarkerHexadecimal, + Name::CharacterReferenceMarkerNumeric, + Name::CharacterReferenceMarkerSemi, + Name::CharacterReferenceValue, + Name::CodeFencedFenceSequence, + Name::CodeFlowChunk, + Name::CodeTextData, + Name::CodeTextSequence, + Name::Data, + Name::DefinitionDestinationLiteralMarker, + Name::DefinitionLabelMarker, + Name::DefinitionMarker, + Name::DefinitionTitleMarker, + Name::EmphasisSequence, + Name::HardBreakEscape, + Name::HardBreakTrailing, + Name::HeadingAtxSequence, + Name::HeadingSetextUnderline, + Name::HtmlFlowData, + Name::HtmlTextData, + Name::LabelImageMarker, + Name::LabelMarker, + Name::LineEnding, + Name::ListItemMarker, + Name::ListItemValue, + Name::ReferenceMarker, + Name::ResourceMarker, + Name::ResourceTitleMarker, + Name::StrongSequence, + Name::ThematicBreakSequence, ]; + +/// Embedded content type. +#[derive(Debug, Clone, PartialEq)] +pub enum Content { + /// Represents [flow content][crate::content::flow]. + Flow, + /// Represents [string content][crate::content::string]. + String, + /// Represents [text content][crate::content::text]. + Text, +} + +/// A link to another event. +#[derive(Debug, Clone)] +pub struct Link { + pub previous: Option<usize>, + pub next: Option<usize>, + pub content_type: Content, +} + +/// A location in the document (`line`/`column`/`offset`). +/// +/// The interface for the location in the document comes from unist `Point`: +/// <https://github.com/syntax-tree/unist#point>. +#[derive(Debug, Clone)] +pub struct Point { + /// 1-indexed line number. + pub line: usize, + /// 1-indexed column number. + /// This is increases up to a tab stop for tabs. + /// Some editors count tabs as 1 character, so this position is not the + /// same as editors. + pub column: usize, + /// 0-indexed position in the document. + /// + /// Also an `index` into `bytes`. + pub index: usize, + /// Virtual step on the same `index`. + pub vs: usize, +} + +/// Possible event kinds. +#[derive(Debug, PartialEq, Clone)] +pub enum Kind { + /// The start of something. + Enter, + /// The end of something. + Exit, +} + +/// Something semantic happening somewhere. +#[derive(Debug, Clone)] +pub struct Event { + pub kind: Kind, + pub name: Name, + pub point: Point, + pub link: Option<Link>, +} @@ -8,10 +8,10 @@ mod compiler; mod constant; mod construct; mod content; +mod event; mod parser; mod state; mod subtokenize; -mod token; mod tokenizer; mod unicode; mod util; diff --git a/src/parser.rs b/src/parser.rs index 23afb37..dc2c07a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ //! Turn a string of markdown into events. use crate::content::document::document; -use crate::tokenizer::{Event, Point}; +use crate::event::{Event, Point}; use crate::{Constructs, Options}; /// Information needed, in all content types, when parsing markdown. diff --git a/src/subtokenize.rs b/src/subtokenize.rs index c545043..9b7c6ae 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -21,9 +21,10 @@ //! thus the whole document needs to be parsed up to the level of definitions, //! before any level that can include references can be parsed. +use crate::event::{Content, Event, Kind}; use crate::parser::ParseState; -use crate::state::{Name, State}; -use crate::tokenizer::{ContentType, Event, EventType, Tokenizer}; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; use crate::util::edit_map::EditMap; /// Create a link between two [`Event`][]s. @@ -37,10 +38,10 @@ pub fn link(events: &mut [Event], index: usize) { /// Link two arbitrary [`Event`][]s together. pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { - debug_assert_eq!(events[pevious].event_type, EventType::Enter); - debug_assert_eq!(events[pevious + 1].event_type, EventType::Exit); - debug_assert_eq!(events[pevious + 1].token_type, events[pevious].token_type); - debug_assert_eq!(events[next].event_type, EventType::Enter); + debug_assert_eq!(events[pevious].kind, Kind::Enter); + debug_assert_eq!(events[pevious + 1].kind, Kind::Exit); + debug_assert_eq!(events[pevious + 1].name, events[pevious].name); + debug_assert_eq!(events[next].kind, Kind::Enter); // Note: the exit of this event may not exist, so don’t check for that. let link_previous = events[pevious] @@ -70,7 +71,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { // Find each first opening chunk. if let Some(ref link) = event.link { - debug_assert_eq!(event.event_type, EventType::Enter); + debug_assert_eq!(event.kind, Kind::Enter); // No need to enter linked events again. if link.previous == None { @@ -79,17 +80,17 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { // Subtokenizer. let mut tokenizer = Tokenizer::new(event.point.clone(), parse_state); // Substate. - let mut state = State::Next(if link.content_type == ContentType::String { - Name::StringStart + let mut state = State::Next(if link.content_type == Content::String { + StateName::StringStart } else { - Name::TextStart + StateName::TextStart }); // Loop through links to pass them in order to the subtokenizer. while let Some(index) = link_index { let enter = &events[index]; let link_curr = enter.link.as_ref().expect("expected link"); - debug_assert_eq!(enter.event_type, EventType::Enter); + debug_assert_eq!(enter.kind, Kind::Enter); if link_curr.previous != None { tokenizer.define_skip(enter.point.clone()); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index baad6ed..b48351d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,6 +1,6 @@ //! The tokenizer glues states from the state machine together. //! -//! It facilitates everything needed to turn codes into tokens and events with +//! It facilitates everything needed to turn codes into tokens and with //! a state machine. //! It also enables logic needed for parsing markdown, such as an [`attempt`][] //! to parse something, which can succeed or, when unsuccessful, revert the @@ -12,22 +12,11 @@ //! [`check`]: Tokenizer::check use crate::constant::TAB_SIZE; +use crate::event::{Content, Event, Kind, Link, Name, Point, VOID_EVENTS}; use crate::parser::ParseState; -use crate::state::{call, Name, State}; -use crate::token::{Token, VOID_TOKENS}; +use crate::state::{call, Name as StateName, State}; use crate::util::edit_map::EditMap; -/// Embedded content type. -#[derive(Debug, Clone, PartialEq)] -pub enum ContentType { - /// Represents [flow content][crate::content::flow]. - Flow, - /// Represents [string content][crate::content::string]. - String, - /// Represents [text content][crate::content::text]. - Text, -} - /// How to handle a byte. #[derive(Debug, PartialEq)] pub enum ByteAction { @@ -41,53 +30,6 @@ pub enum ByteAction { Ignore, } -/// A location in the document (`line`/`column`/`offset`). -/// -/// The interface for the location in the document comes from unist `Point`: -/// <https://github.com/syntax-tree/unist#point>. -#[derive(Debug, Clone)] -pub struct Point { - /// 1-indexed line number. - pub line: usize, - /// 1-indexed column number. - /// This is increases up to a tab stop for tabs. - /// Some editors count tabs as 1 character, so this position is not the - /// same as editors. - pub column: usize, - /// 0-indexed position in the document. - /// - /// Also an `index` into `bytes`. - pub index: usize, - /// Virtual step on the same `index`. - pub vs: usize, -} - -/// Possible event types. -#[derive(Debug, PartialEq, Clone)] -pub enum EventType { - /// The start of something. - Enter, - /// The end of something. - Exit, -} - -/// A link to another event. -#[derive(Debug, Clone)] -pub struct Link { - pub previous: Option<usize>, - pub next: Option<usize>, - pub content_type: ContentType, -} - -/// Something semantic happening somewhere. -#[derive(Debug, Clone)] -pub struct Event { - pub event_type: EventType, - pub token_type: Token, - pub point: Point, - pub link: Option<Link>, -} - /// Callback that can be registered and is called when the tokenizer is done. /// /// Resolvers are supposed to change the list of events, because parsing is @@ -205,15 +147,15 @@ pub struct TokenizeState<'a> { pub document_paragraph_before: bool, // Couple of very frequent settings for parsing whitespace. - pub space_or_tab_eol_content_type: Option<ContentType>, + pub space_or_tab_eol_content_type: Option<Content>, pub space_or_tab_eol_connect: bool, pub space_or_tab_eol_ok: bool, pub space_or_tab_connect: bool, - pub space_or_tab_content_type: Option<ContentType>, + pub space_or_tab_content_type: Option<Content>, pub space_or_tab_min: usize, pub space_or_tab_max: usize, pub space_or_tab_size: usize, - pub space_or_tab_token: Token, + pub space_or_tab_token: Name, // Couple of media related fields. /// Stack of label (start) that could form images and links. @@ -250,15 +192,15 @@ pub struct TokenizeState<'a> { /// Index. pub end: usize, /// Slot for a token type. - pub token_1: Token, + pub token_1: Name, /// Slot for a token type. - pub token_2: Token, + pub token_2: Name, /// Slot for a token type. - pub token_3: Token, + pub token_3: Name, /// Slot for a token type. - pub token_4: Token, + pub token_4: Name, /// Slot for a token type. - pub token_5: Token, + pub token_5: Name, } /// A tokenizer itself. @@ -290,7 +232,7 @@ pub struct Tokenizer<'a> { /// Hierarchy of semantic labels. /// /// Tracked to make sure everything’s valid. - pub stack: Vec<Token>, + pub stack: Vec<Name>, /// Edit map, to batch changes. pub map: EditMap, /// List of attached resolvers, which will be called when done feeding, @@ -363,12 +305,12 @@ impl<'a> Tokenizer<'a> { space_or_tab_min: 0, space_or_tab_max: 0, space_or_tab_size: 0, - space_or_tab_token: Token::SpaceOrTab, - token_1: Token::Data, - token_2: Token::Data, - token_3: Token::Data, - token_4: Token::Data, - token_5: Token::Data, + space_or_tab_token: Name::SpaceOrTab, + token_1: Name::Data, + token_2: Name::Data, + token_3: Name::Data, + token_4: Name::Data, + token_5: Name::Data, }, map: EditMap::new(), interrupt: false, @@ -491,13 +433,13 @@ impl<'a> Tokenizer<'a> { } /// Mark the start of a semantic label. - pub fn enter(&mut self, token_type: Token) { - self.enter_with_link(token_type, None); + pub fn enter(&mut self, name: Name) { + self.enter_with_link(name, None); } - pub fn enter_with_content(&mut self, token_type: Token, content_type_opt: Option<ContentType>) { + pub fn enter_with_content(&mut self, name: Name, content_type_opt: Option<Content>) { self.enter_with_link( - token_type, + name, content_type_opt.map(|content_type| Link { content_type, previous: None, @@ -506,26 +448,26 @@ impl<'a> Tokenizer<'a> { ); } - pub fn enter_with_link(&mut self, token_type: Token, link: Option<Link>) { + pub fn enter_with_link(&mut self, name: Name, link: Option<Link>) { let mut point = self.point.clone(); move_point_back(self, &mut point); - log::debug!("enter: `{:?}`", token_type); + log::debug!("enter: `{:?}`", name); self.events.push(Event { - event_type: EventType::Enter, - token_type: token_type.clone(), + kind: Kind::Enter, + name: name.clone(), point, link, }); - self.stack.push(token_type); + self.stack.push(name); } /// Mark the end of a semantic label. - pub fn exit(&mut self, token_type: Token) { + pub fn exit(&mut self, name: Name) { let current_token = self.stack.pop().expect("cannot close w/o open tokens"); debug_assert_eq!( - current_token, token_type, + current_token, name, "expected exit token to match current token" ); @@ -533,18 +475,18 @@ impl<'a> Tokenizer<'a> { let mut point = self.point.clone(); debug_assert!( - current_token != previous.token_type + current_token != previous.name || previous.point.index != point.index || previous.point.vs != point.vs, "expected non-empty token" ); - if VOID_TOKENS.iter().any(|d| d == &token_type) { + if VOID_EVENTS.iter().any(|d| d == &name) { debug_assert!( - current_token == previous.token_type, + current_token == previous.name, "expected token to be void (`{:?}`), instead of including `{:?}`", current_token, - previous.token_type + previous.name ); } @@ -556,10 +498,10 @@ impl<'a> Tokenizer<'a> { move_point_back(self, &mut point); } - log::debug!("exit: `{:?}`", token_type); + log::debug!("exit: `{:?}`", name); self.events.push(Event { - event_type: EventType::Exit, - token_type, + kind: Kind::Exit, + name, point, link: None, }); @@ -595,7 +537,7 @@ impl<'a> Tokenizer<'a> { /// Parse with `name` and its future states, to see if that results in /// [`State::Ok`][] or [`State::Nok`][], then revert in both cases. - pub fn check(&mut self, name: Name, ok: State, nok: State) -> State { + pub fn check(&mut self, name: StateName, ok: State, nok: State) -> State { // Always capture (and restore) when checking. // No need to capture (and restore) when `nok` is `State::Nok`, because the // parent attempt will do it. @@ -614,7 +556,7 @@ impl<'a> Tokenizer<'a> { /// Parse with `name` and its future states, to see if that results in /// [`State::Ok`][] or [`State::Nok`][], revert in the case of /// `State::Nok`. - pub fn attempt(&mut self, name: Name, ok: State, nok: State) -> State { + pub fn attempt(&mut self, name: StateName, ok: State, nok: State) -> State { // Always capture (and restore) when checking. // No need to capture (and restore) when `nok` is `State::Nok`, because the // parent attempt will do it. diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index 4d9f557..59adfca 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -8,7 +8,7 @@ //! And, in other cases, it’s needed to parse subcontent: pass some events //! through another tokenizer and inject the result. -use crate::tokenizer::Event; +use crate::event::Event; /// Shift `previous` and `next` links according to `jumps`. /// diff --git a/src/util/skip.rs b/src/util/skip.rs index d2ad914..371418f 100644 --- a/src/util/skip.rs +++ b/src/util/skip.rs @@ -1,31 +1,30 @@ //! Utilities to deal with lists of events. -use crate::token::Token; -use crate::tokenizer::{Event, EventType}; +use crate::event::{Event, Kind, Name}; -/// Skip from `index`, optionally past `token_types`. -pub fn opt(events: &[Event], index: usize, token_types: &[Token]) -> usize { - skip_opt_impl(events, index, token_types, true) +/// Skip from `index`, optionally past `names`. +pub fn opt(events: &[Event], index: usize, names: &[Name]) -> usize { + skip_opt_impl(events, index, names, true) } -/// Skip from `index`, optionally past `token_types`, backwards. -pub fn opt_back(events: &[Event], index: usize, token_types: &[Token]) -> usize { - skip_opt_impl(events, index, token_types, false) +/// Skip from `index`, optionally past `names`, backwards. +pub fn opt_back(events: &[Event], index: usize, names: &[Name]) -> usize { + skip_opt_impl(events, index, names, false) } -pub fn to_back(events: &[Event], index: usize, token_types: &[Token]) -> usize { - to_impl(events, index, token_types, false) +pub fn to_back(events: &[Event], index: usize, names: &[Name]) -> usize { + to_impl(events, index, names, false) } -pub fn to(events: &[Event], index: usize, token_types: &[Token]) -> usize { - to_impl(events, index, token_types, true) +pub fn to(events: &[Event], index: usize, names: &[Name]) -> usize { + to_impl(events, index, names, true) } -pub fn to_impl(events: &[Event], mut index: usize, token_types: &[Token], forward: bool) -> usize { +pub fn to_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool) -> usize { while index < events.len() { - let current = &events[index].token_type; + let current = &events[index].name; - if token_types.contains(current) { + if names.contains(current) { break; } @@ -36,23 +35,14 @@ pub fn to_impl(events: &[Event], mut index: usize, token_types: &[Token], forwar } /// Skip internals. -fn skip_opt_impl( - events: &[Event], - mut index: usize, - token_types: &[Token], - forward: bool, -) -> usize { +fn skip_opt_impl(events: &[Event], mut index: usize, names: &[Name], forward: bool) -> usize { let mut balance = 0; - let open = if forward { - EventType::Enter - } else { - EventType::Exit - }; + let open = if forward { Kind::Enter } else { Kind::Exit }; while index < events.len() { - let current = &events[index].token_type; + let current = &events[index].name; - if !token_types.contains(current) || events[index].event_type != open { + if !names.contains(current) || events[index].kind != open { break; } @@ -60,13 +50,13 @@ fn skip_opt_impl( balance += 1; loop { - balance = if events[index].event_type == open { + balance = if events[index].kind == open { balance + 1 } else { balance - 1 }; - if events[index].token_type == *current && balance == 0 { + if events[index].name == *current && balance == 0 { index = if forward { index + 1 } else { index - 1 }; break; } diff --git a/src/util/slice.rs b/src/util/slice.rs index f287978..34adf32 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -1,7 +1,7 @@ //! Utilities to deal with characters. use crate::constant::TAB_SIZE; -use crate::tokenizer::{Event, EventType, Point}; +use crate::event::{Event, Kind, Point}; use std::str; /// A range between two places. @@ -24,15 +24,15 @@ impl<'a> Position<'a> { pub fn from_exit_event(events: &'a [Event], index: usize) -> Position<'a> { let exit = &events[index]; debug_assert_eq!( - exit.event_type, - EventType::Exit, + exit.kind, + Kind::Exit, "expected `from_exit_event` to be called on `exit` event" ); let mut enter_index = index - 1; loop { let enter = &events[enter_index]; - if enter.event_type == EventType::Enter && enter.token_type == exit.token_type { + if enter.kind == Kind::Enter && enter.name == exit.name { return Position { start: &enter.point, end: &exit.point, |