diff options
Diffstat (limited to '')
-rw-r--r-- | src/content/document.rs | 98 | ||||
-rw-r--r-- | src/content/flow.rs | 110 | ||||
-rw-r--r-- | src/content/string.rs | 24 | ||||
-rw-r--r-- | src/content/text.rs | 72 |
4 files changed, 155 insertions, 149 deletions
diff --git a/src/content/document.rs b/src/content/document.rs index b5ff532..998bc06 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -8,13 +8,11 @@ //! * [Block quote][crate::construct::block_quote] //! * [List][crate::construct::list] +use crate::event::{Content, Event, Kind, Link, Name, Point}; use crate::parser::ParseState; -use crate::state::{Name, State}; +use crate::state::{Name as StateName, State}; use crate::subtokenize::{divide_events, subtokenize}; -use crate::token::Token; -use crate::tokenizer::{ - Container, ContainerState, ContentType, Event, EventType, Link, Point, Tokenizer, -}; +use crate::tokenizer::{Container, ContainerState, Tokenizer}; use crate::util::{ normalize_identifier::normalize_identifier, skip, @@ -59,7 +57,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> { let state = tokenizer.push( (0, 0), (parse_state.bytes.len(), 0), - State::Next(Name::DocumentStart), + State::Next(StateName::DocumentStart), ); tokenizer.flush(state, true); @@ -69,7 +67,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> { while index < tokenizer.events.len() { let event = &tokenizer.events[index]; - if event.event_type == EventType::Exit && event.token_type == Token::DefinitionLabelString { + if event.kind == Kind::Exit && event.name == Name::DefinitionLabelString { // Note: we don’t care about virtual spaces, so `as_str` is fine. let id = normalize_identifier( Slice::from_position( @@ -111,9 +109,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ))); tokenizer.attempt( - Name::BomStart, - State::Next(Name::DocumentContainerExistingBefore), - State::Next(Name::DocumentContainerExistingBefore), + StateName::BomStart, + State::Next(StateName::DocumentContainerExistingBefore), + State::Next(StateName::DocumentContainerExistingBefore), ) } @@ -134,16 +132,16 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( match container.kind { - Container::BlockQuote => Name::BlockQuoteContStart, - Container::ListItem => Name::ListContStart, + Container::BlockQuote => StateName::BlockQuoteContStart, + Container::ListItem => StateName::ListContStart, }, - State::Next(Name::DocumentContainerExistingAfter), - State::Next(Name::DocumentContainerNewBefore), + State::Next(StateName::DocumentContainerExistingAfter), + State::Next(StateName::DocumentContainerNewBefore), ) } // Otherwise, check new containers. else { - State::Retry(Name::DocumentContainerNewBefore) + State::Retry(StateName::DocumentContainerNewBefore) } } @@ -156,7 +154,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_continued += 1; - State::Retry(Name::DocumentContainerExistingBefore) + State::Retry(StateName::DocumentContainerExistingBefore) } /// Before a new container. @@ -180,7 +178,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { // …and if we’re in a concrete construct, new containers can’t “pierce” // into them. if child.concrete { - return State::Retry(Name::DocumentContainersAfter); + return State::Retry(StateName::DocumentContainersAfter); } } @@ -203,9 +201,9 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { .swap(tokenizer.tokenize_state.document_continued, tail); tokenizer.attempt( - Name::BlockQuoteStart, - State::Next(Name::DocumentContainerNewAfter), - State::Next(Name::DocumentContainerNewBeforeNotBlockQuote), + StateName::BlockQuoteStart, + State::Next(StateName::DocumentContainerNewAfter), + State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote), ) } @@ -226,9 +224,9 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State }; tokenizer.attempt( - Name::ListStart, - State::Next(Name::DocumentContainerNewAfter), - State::Next(Name::DocumentContainerNewBeforeNotList), + StateName::ListStart, + State::Next(StateName::DocumentContainerNewAfter), + State::Next(StateName::DocumentContainerNewBeforeNotList), ) } @@ -247,7 +245,7 @@ pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State { .document_container_stack .swap_remove(tokenizer.tokenize_state.document_continued); - State::Retry(Name::DocumentContainersAfter) + State::Retry(StateName::DocumentContainersAfter) } /// After a new container. @@ -281,7 +279,7 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { .push(container); tokenizer.tokenize_state.document_continued += 1; tokenizer.interrupt = false; - State::Retry(Name::DocumentContainerNewBefore) + State::Retry(StateName::DocumentContainerNewBefore) } /// After containers, before flow. @@ -301,7 +299,7 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Note: EOL is part of data. - None => State::Retry(Name::DocumentFlowEnd), + None => State::Retry(StateName::DocumentFlowEnd), Some(_) => { let current = tokenizer.events.len(); let previous = tokenizer.tokenize_state.document_data_index; @@ -310,14 +308,14 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { } tokenizer.tokenize_state.document_data_index = Some(current); tokenizer.enter_with_link( - Token::Data, + Name::Data, Some(Link { previous, next: None, - content_type: ContentType::Flow, + content_type: Content::Flow, }), ); - State::Retry(Name::DocumentFlowInside) + State::Retry(StateName::DocumentFlowInside) } } } @@ -331,18 +329,18 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State { pub fn flow_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { - tokenizer.exit(Token::Data); - State::Retry(Name::DocumentFlowEnd) + tokenizer.exit(Name::Data); + State::Retry(StateName::DocumentFlowEnd) } // Note: EOL is part of data. Some(b'\n') => { tokenizer.consume(); - tokenizer.exit(Token::Data); - State::Next(Name::DocumentFlowEnd) + tokenizer.exit(Name::Data); + State::Next(StateName::DocumentFlowEnd) } Some(_) => { tokenizer.consume(); - State::Next(Name::DocumentFlowInside) + State::Next(StateName::DocumentFlowInside) } } } @@ -359,7 +357,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { let state = tokenizer .tokenize_state .document_child_state - .unwrap_or(State::Next(Name::FlowStart)); + .unwrap_or(State::Next(StateName::FlowStart)); tokenizer.tokenize_state.document_exits.push(None); @@ -369,12 +367,12 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { state, ); - let paragraph = matches!(state, State::Next(Name::ParagraphInside)) + let paragraph = matches!(state, State::Next(StateName::ParagraphInside)) || (!child.events.is_empty() && child.events - [skip::opt_back(&child.events, child.events.len() - 1, &[Token::LineEnding])] - .token_type - == Token::Paragraph); + [skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding])] + .name + == Name::Paragraph); tokenizer.tokenize_state.document_child_state = Some(state); @@ -401,7 +399,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_paragraph_before = paragraph; // Containers would only be interrupting if we’ve continued. tokenizer.interrupt = false; - State::Retry(Name::DocumentContainerExistingBefore) + State::Retry(StateName::DocumentContainerExistingBefore) } } } @@ -421,7 +419,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { .tokenize_state .document_child_state .take() - .unwrap_or(State::Next(Name::FlowStart)); + .unwrap_or(State::Next(StateName::FlowStart)); child.flush(state, false); } @@ -433,14 +431,14 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { while !stack_close.is_empty() { let container = stack_close.pop().unwrap(); - let token_type = match container.kind { - Container::BlockQuote => Token::BlockQuote, - Container::ListItem => Token::ListItem, + let name = match container.kind { + Container::BlockQuote => Name::BlockQuote, + Container::ListItem => Name::ListItem, }; exits.push(Event { - event_type: EventType::Exit, - token_type: token_type.clone(), + kind: Kind::Exit, + name: name.clone(), point: tokenizer.point.clone(), link: None, }); @@ -451,7 +449,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { while stack_index > 0 { stack_index -= 1; - if tokenizer.stack[stack_index] == token_type { + if tokenizer.stack[stack_index] == name { tokenizer.stack.remove(stack_index); found = true; break; @@ -482,8 +480,8 @@ fn resolve(tokenizer: &mut Tokenizer) { while child_index < child.events.len() { let event = &child.events[child_index]; - if event.event_type == EventType::Enter - && (event.token_type == Token::LineEnding || event.token_type == Token::BlankLineEnding) + if event.kind == Kind::Enter + && (event.name == Name::LineEnding || event.name == Name::BlankLineEnding) { if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() { let mut exit_index = 0; @@ -507,7 +505,7 @@ fn resolve(tokenizer: &mut Tokenizer) { divide_events( &mut tokenizer.map, &tokenizer.events, - skip::to(&tokenizer.events, 0, &[Token::Data]), + skip::to(&tokenizer.events, 0, &[Name::Data]), &mut child.events, ); diff --git a/src/content/flow.rs b/src/content/flow.rs index 16a1cba..7eb7b64 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -19,8 +19,8 @@ //! * [HTML (flow)][crate::construct::html_flow] //! * [Thematic break][crate::construct::thematic_break] -use crate::state::{Name, State}; -use crate::token::Token; +use crate::event::Name; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Before flow. @@ -36,42 +36,42 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`' | b'~') => tokenizer.attempt( - Name::CodeFencedStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::CodeFencedStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), Some(b'<') => tokenizer.attempt( - Name::HtmlFlowStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::HtmlFlowStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), Some(b'#') => tokenizer.attempt( - Name::HeadingAtxStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::HeadingAtxStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), // Note: `-` is also used in thematic breaks, so it’s not included here. Some(b'=') => tokenizer.attempt( - Name::HeadingSetextStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::HeadingSetextStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), Some(b'*' | b'_') => tokenizer.attempt( - Name::ThematicBreakStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::ThematicBreakStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), Some(b'[') => tokenizer.attempt( - Name::DefinitionStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::DefinitionStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ), // Actual parsing: blank line? Indented code? Indented anything? // Also includes `-` which can be a setext heading underline or a thematic break. - None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(Name::FlowBlankLineBefore), + None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), Some(_) => tokenizer.attempt( - Name::ParagraphStart, - State::Next(Name::FlowAfter), + StateName::ParagraphStart, + State::Next(StateName::FlowAfter), State::Nok, ), } @@ -79,9 +79,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::BlankLineStart, - State::Next(Name::FlowBlankLineAfter), - State::Next(Name::FlowBeforeCodeIndented), + StateName::BlankLineStart, + State::Next(StateName::FlowBlankLineAfter), + State::Next(StateName::FlowBeforeCodeIndented), ) } @@ -99,57 +99,57 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::CodeIndentedStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeCodeFenced), + StateName::CodeIndentedStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeCodeFenced), ) } pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::CodeFencedStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeHtml), + StateName::CodeFencedStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHtml), ) } pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HtmlFlowStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeHeadingAtx), + StateName::HtmlFlowStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHeadingAtx), ) } pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HeadingAtxStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeHeadingSetext), + StateName::HeadingAtxStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHeadingSetext), ) } pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HeadingSetextStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeThematicBreak), + StateName::HeadingSetextStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeThematicBreak), ) } pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::ThematicBreakStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeDefinition), + StateName::ThematicBreakStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeDefinition), ) } pub fn before_definition(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::DefinitionStart, - State::Next(Name::FlowAfter), - State::Next(Name::FlowBeforeParagraph), + StateName::DefinitionStart, + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), ) } @@ -164,12 +164,12 @@ pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'\n') => { - tokenizer.enter(Token::BlankLineEnding); + tokenizer.enter(Name::BlankLineEnding); tokenizer.consume(); - tokenizer.exit(Token::BlankLineEnding); + tokenizer.exit(Name::BlankLineEnding); // Feel free to interrupt. tokenizer.interrupt = false; - State::Next(Name::FlowStart) + State::Next(StateName::FlowStart) } _ => unreachable!("expected eol/eof"), } @@ -188,10 +188,10 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'\n') => { - tokenizer.enter(Token::LineEnding); + tokenizer.enter(Name::LineEnding); tokenizer.consume(); - tokenizer.exit(Token::LineEnding); - State::Next(Name::FlowStart) + tokenizer.exit(Name::LineEnding); + State::Next(StateName::FlowStart) } _ => unreachable!("expected eol/eof"), } @@ -204,8 +204,8 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::ParagraphStart, - State::Next(Name::FlowAfter), + StateName::ParagraphStart, + State::Next(StateName::FlowAfter), State::Nok, ) } diff --git a/src/content/string.rs b/src/content/string.rs index 927f582..79dee6c 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -13,7 +13,7 @@ //! [text]: crate::content::text use crate::construct::partial_whitespace::resolve_whitespace; -use crate::state::{Name, State}; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; const MARKERS: [u8; 2] = [b'&', b'\\']; @@ -22,7 +22,7 @@ const MARKERS: [u8; 2] = [b'&', b'\\']; pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(Name::StringBefore) + State::Retry(StateName::StringBefore) } /// Before string. @@ -30,22 +30,26 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'&') => tokenizer.attempt( - Name::CharacterReferenceStart, - State::Next(Name::StringBefore), - State::Next(Name::StringBeforeData), + StateName::CharacterReferenceStart, + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), ), Some(b'\\') => tokenizer.attempt( - Name::CharacterEscapeStart, - State::Next(Name::StringBefore), - State::Next(Name::StringBeforeData), + StateName::CharacterEscapeStart, + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), ), - _ => State::Retry(Name::StringBeforeData), + _ => State::Retry(StateName::StringBeforeData), } } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(Name::DataStart, State::Next(Name::StringBefore), State::Nok) + tokenizer.attempt( + StateName::DataStart, + State::Next(StateName::StringBefore), + State::Nok, + ) } /// Resolve whitespace. diff --git a/src/content/text.rs b/src/content/text.rs index 1b3890e..77c5963 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -21,7 +21,7 @@ //! > [whitespace][crate::construct::partial_whitespace]. use crate::construct::partial_whitespace::resolve_whitespace; -use crate::state::{Name, State}; +use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; const MARKERS: [u8; 9] = [ @@ -40,7 +40,7 @@ const MARKERS: [u8; 9] = [ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(Name::TextBefore) + State::Retry(StateName::TextBefore) } /// Before text. @@ -48,71 +48,75 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => State::Ok, Some(b'!') => tokenizer.attempt( - Name::LabelStartImageStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::LabelStartImageStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), Some(b'&') => tokenizer.attempt( - Name::CharacterReferenceStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::CharacterReferenceStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), Some(b'*' | b'_') => tokenizer.attempt( - Name::AttentionStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::AttentionStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), // `autolink`, `html_text` (order does not matter) Some(b'<') => tokenizer.attempt( - Name::AutolinkStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeHtml), + StateName::AutolinkStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeHtml), ), Some(b'[') => tokenizer.attempt( - Name::LabelStartLinkStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::LabelStartLinkStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), Some(b'\\') => tokenizer.attempt( - Name::CharacterEscapeStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeHardBreakEscape), + StateName::CharacterEscapeStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeHardBreakEscape), ), Some(b']') => tokenizer.attempt( - Name::LabelEndStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::LabelEndStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), Some(b'`') => tokenizer.attempt( - Name::CodeTextStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::CodeTextStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ), - _ => State::Retry(Name::TextBeforeData), + _ => State::Retry(StateName::TextBeforeData), } } /// At `<`, which wasn’t an autolink: before HTML? pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HtmlTextStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::HtmlTextStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ) } /// At `\`, which wasn’t a character escape: before a hard break? pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - Name::HardBreakEscapeStart, - State::Next(Name::TextBefore), - State::Next(Name::TextBeforeData), + StateName::HardBreakEscapeStart, + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), ) } /// At data. pub fn before_data(tokenizer: &mut Tokenizer) -> State { - tokenizer.attempt(Name::DataStart, State::Next(Name::TextBefore), State::Nok) + tokenizer.attempt( + StateName::DataStart, + State::Next(StateName::TextBefore), + State::Nok, + ) } /// Resolve whitespace. |