aboutsummaryrefslogtreecommitdiffstats
path: root/src/content
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-11 13:45:24 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-11 13:45:24 +0200
commit6eb2f644057f371841fe25330a57ee185f91c7af (patch)
tree7b4d02586339d1a7f82104b4473d9ac243b3abf9 /src/content
parent2d35cbfceace81a217cd0fbdae7a8777c7a6465e (diff)
downloadmarkdown-rs-6eb2f644057f371841fe25330a57ee185f91c7af.tar.gz
markdown-rs-6eb2f644057f371841fe25330a57ee185f91c7af.tar.bz2
markdown-rs-6eb2f644057f371841fe25330a57ee185f91c7af.zip
Refactor to move some code to `state.rs`
Diffstat (limited to 'src/content')
-rw-r--r--src/content/document.rs60
-rw-r--r--src/content/flow.rs101
-rw-r--r--src/content/string.rs25
-rw-r--r--src/content/text.rs73
4 files changed, 127 insertions, 132 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
index 49ca919..b5ff532 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -9,11 +9,11 @@
//! * [List][crate::construct::list]
use crate::parser::ParseState;
+use crate::state::{Name, State};
use crate::subtokenize::{divide_events, subtokenize};
use crate::token::Token;
use crate::tokenizer::{
- Container, ContainerState, ContentType, Event, EventType, Link, Point, State, StateName,
- Tokenizer,
+ Container, ContainerState, ContentType, Event, EventType, Link, Point, Tokenizer,
};
use crate::util::{
normalize_identifier::normalize_identifier,
@@ -59,7 +59,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
let state = tokenizer.push(
(0, 0),
(parse_state.bytes.len(), 0),
- State::Next(StateName::DocumentStart),
+ State::Next(Name::DocumentStart),
);
tokenizer.flush(state, true);
@@ -111,9 +111,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
)));
tokenizer.attempt(
- StateName::BomStart,
- State::Next(StateName::DocumentContainerExistingBefore),
- State::Next(StateName::DocumentContainerExistingBefore),
+ Name::BomStart,
+ State::Next(Name::DocumentContainerExistingBefore),
+ State::Next(Name::DocumentContainerExistingBefore),
)
}
@@ -134,16 +134,16 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
match container.kind {
- Container::BlockQuote => StateName::BlockQuoteContStart,
- Container::ListItem => StateName::ListContStart,
+ Container::BlockQuote => Name::BlockQuoteContStart,
+ Container::ListItem => Name::ListContStart,
},
- State::Next(StateName::DocumentContainerExistingAfter),
- State::Next(StateName::DocumentContainerNewBefore),
+ State::Next(Name::DocumentContainerExistingAfter),
+ State::Next(Name::DocumentContainerNewBefore),
)
}
// Otherwise, check new containers.
else {
- State::Retry(StateName::DocumentContainerNewBefore)
+ State::Retry(Name::DocumentContainerNewBefore)
}
}
@@ -156,7 +156,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.document_continued += 1;
- State::Retry(StateName::DocumentContainerExistingBefore)
+ State::Retry(Name::DocumentContainerExistingBefore)
}
/// Before a new container.
@@ -180,7 +180,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
// …and if we’re in a concrete construct, new containers can’t “pierce”
// into them.
if child.concrete {
- return State::Retry(StateName::DocumentContainersAfter);
+ return State::Retry(Name::DocumentContainersAfter);
}
}
@@ -203,9 +203,9 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
.swap(tokenizer.tokenize_state.document_continued, tail);
tokenizer.attempt(
- StateName::BlockQuoteStart,
- State::Next(StateName::DocumentContainerNewAfter),
- State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote),
+ Name::BlockQuoteStart,
+ State::Next(Name::DocumentContainerNewAfter),
+ State::Next(Name::DocumentContainerNewBeforeNotBlockQuote),
)
}
@@ -226,9 +226,9 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State
};
tokenizer.attempt(
- StateName::ListStart,
- State::Next(StateName::DocumentContainerNewAfter),
- State::Next(StateName::DocumentContainerNewBeforeNotList),
+ Name::ListStart,
+ State::Next(Name::DocumentContainerNewAfter),
+ State::Next(Name::DocumentContainerNewBeforeNotList),
)
}
@@ -247,7 +247,7 @@ pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State {
.document_container_stack
.swap_remove(tokenizer.tokenize_state.document_continued);
- State::Retry(StateName::DocumentContainersAfter)
+ State::Retry(Name::DocumentContainersAfter)
}
/// After a new container.
@@ -281,7 +281,7 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
.push(container);
tokenizer.tokenize_state.document_continued += 1;
tokenizer.interrupt = false;
- State::Retry(StateName::DocumentContainerNewBefore)
+ State::Retry(Name::DocumentContainerNewBefore)
}
/// After containers, before flow.
@@ -301,7 +301,7 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Note: EOL is part of data.
- None => State::Retry(StateName::DocumentFlowEnd),
+ None => State::Retry(Name::DocumentFlowEnd),
Some(_) => {
let current = tokenizer.events.len();
let previous = tokenizer.tokenize_state.document_data_index;
@@ -317,7 +317,7 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State {
content_type: ContentType::Flow,
}),
);
- State::Retry(StateName::DocumentFlowInside)
+ State::Retry(Name::DocumentFlowInside)
}
}
}
@@ -332,17 +332,17 @@ pub fn flow_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => {
tokenizer.exit(Token::Data);
- State::Retry(StateName::DocumentFlowEnd)
+ State::Retry(Name::DocumentFlowEnd)
}
// Note: EOL is part of data.
Some(b'\n') => {
tokenizer.consume();
tokenizer.exit(Token::Data);
- State::Next(StateName::DocumentFlowEnd)
+ State::Next(Name::DocumentFlowEnd)
}
Some(_) => {
tokenizer.consume();
- State::Next(StateName::DocumentFlowInside)
+ State::Next(Name::DocumentFlowInside)
}
}
}
@@ -359,7 +359,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
let state = tokenizer
.tokenize_state
.document_child_state
- .unwrap_or(State::Next(StateName::FlowStart));
+ .unwrap_or(State::Next(Name::FlowStart));
tokenizer.tokenize_state.document_exits.push(None);
@@ -369,7 +369,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
state,
);
- let paragraph = matches!(state, State::Next(StateName::ParagraphInside))
+ let paragraph = matches!(state, State::Next(Name::ParagraphInside))
|| (!child.events.is_empty()
&& child.events
[skip::opt_back(&child.events, child.events.len() - 1, &[Token::LineEnding])]
@@ -401,7 +401,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.document_paragraph_before = paragraph;
// Containers would only be interrupting if we’ve continued.
tokenizer.interrupt = false;
- State::Retry(StateName::DocumentContainerExistingBefore)
+ State::Retry(Name::DocumentContainerExistingBefore)
}
}
}
@@ -421,7 +421,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
.tokenize_state
.document_child_state
.take()
- .unwrap_or(State::Next(StateName::FlowStart));
+ .unwrap_or(State::Next(Name::FlowStart));
child.flush(state, false);
}
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 886b5f0..16a1cba 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -19,8 +19,9 @@
//! * [HTML (flow)][crate::construct::html_flow]
//! * [Thematic break][crate::construct::thematic_break]
+use crate::state::{Name, State};
use crate::token::Token;
-use crate::tokenizer::{State, StateName, Tokenizer};
+use crate::tokenizer::Tokenizer;
/// Before flow.
///
@@ -35,42 +36,42 @@ use crate::tokenizer::{State, StateName, Tokenizer};
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`' | b'~') => tokenizer.attempt(
- StateName::CodeFencedStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ Name::CodeFencedStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeParagraph),
),
Some(b'<') => tokenizer.attempt(
- StateName::HtmlFlowStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ Name::HtmlFlowStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeParagraph),
),
Some(b'#') => tokenizer.attempt(
- StateName::HeadingAtxStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ Name::HeadingAtxStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeParagraph),
),
// Note: `-` is also used in thematic breaks, so it’s not included here.
Some(b'=') => tokenizer.attempt(
- StateName::HeadingSetextStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ Name::HeadingSetextStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeParagraph),
),
Some(b'*' | b'_') => tokenizer.attempt(
- StateName::ThematicBreakStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ Name::ThematicBreakStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeParagraph),
),
Some(b'[') => tokenizer.attempt(
- StateName::DefinitionStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ Name::DefinitionStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeParagraph),
),
// Actual parsing: blank line? Indented code? Indented anything?
// Also includes `-` which can be a setext heading underline or a thematic break.
- None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
+ None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(Name::FlowBlankLineBefore),
Some(_) => tokenizer.attempt(
- StateName::ParagraphStart,
- State::Next(StateName::FlowAfter),
+ Name::ParagraphStart,
+ State::Next(Name::FlowAfter),
State::Nok,
),
}
@@ -78,9 +79,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::BlankLineStart,
- State::Next(StateName::FlowBlankLineAfter),
- State::Next(StateName::FlowBeforeCodeIndented),
+ Name::BlankLineStart,
+ State::Next(Name::FlowBlankLineAfter),
+ State::Next(Name::FlowBeforeCodeIndented),
)
}
@@ -98,57 +99,57 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::CodeIndentedStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeCodeFenced),
+ Name::CodeIndentedStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeCodeFenced),
)
}
pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::CodeFencedStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeHtml),
+ Name::CodeFencedStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeHtml),
)
}
pub fn before_html(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::HtmlFlowStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeHeadingAtx),
+ Name::HtmlFlowStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeHeadingAtx),
)
}
pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::HeadingAtxStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeHeadingSetext),
+ Name::HeadingAtxStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeHeadingSetext),
)
}
pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::HeadingSetextStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeThematicBreak),
+ Name::HeadingSetextStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeThematicBreak),
)
}
pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::ThematicBreakStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeDefinition),
+ Name::ThematicBreakStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeDefinition),
)
}
pub fn before_definition(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::DefinitionStart,
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
+ Name::DefinitionStart,
+ State::Next(Name::FlowAfter),
+ State::Next(Name::FlowBeforeParagraph),
)
}
@@ -168,7 +169,7 @@ pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::BlankLineEnding);
// Feel free to interrupt.
tokenizer.interrupt = false;
- State::Next(StateName::FlowStart)
+ State::Next(Name::FlowStart)
}
_ => unreachable!("expected eol/eof"),
}
@@ -190,7 +191,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Next(StateName::FlowStart)
+ State::Next(Name::FlowStart)
}
_ => unreachable!("expected eol/eof"),
}
@@ -203,8 +204,8 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::ParagraphStart,
- State::Next(StateName::FlowAfter),
+ Name::ParagraphStart,
+ State::Next(Name::FlowAfter),
State::Nok,
)
}
diff --git a/src/content/string.rs b/src/content/string.rs
index 5dfceb0..927f582 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -13,7 +13,8 @@
//! [text]: crate::content::text
use crate::construct::partial_whitespace::resolve_whitespace;
-use crate::tokenizer::{State, StateName, Tokenizer};
+use crate::state::{Name, State};
+use crate::tokenizer::Tokenizer;
const MARKERS: [u8; 2] = [b'&', b'\\'];
@@ -21,7 +22,7 @@ const MARKERS: [u8; 2] = [b'&', b'\\'];
pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve));
tokenizer.tokenize_state.markers = &MARKERS;
- State::Retry(StateName::StringBefore)
+ State::Retry(Name::StringBefore)
}
/// Before string.
@@ -29,26 +30,22 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
Some(b'&') => tokenizer.attempt(
- StateName::CharacterReferenceStart,
- State::Next(StateName::StringBefore),
- State::Next(StateName::StringBeforeData),
+ Name::CharacterReferenceStart,
+ State::Next(Name::StringBefore),
+ State::Next(Name::StringBeforeData),
),
Some(b'\\') => tokenizer.attempt(
- StateName::CharacterEscapeStart,
- State::Next(StateName::StringBefore),
- State::Next(StateName::StringBeforeData),
+ Name::CharacterEscapeStart,
+ State::Next(Name::StringBefore),
+ State::Next(Name::StringBeforeData),
),
- _ => State::Retry(StateName::StringBeforeData),
+ _ => State::Retry(Name::StringBeforeData),
}
}
/// At data.
pub fn before_data(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- StateName::DataStart,
- State::Next(StateName::StringBefore),
- State::Nok,
- )
+ tokenizer.attempt(Name::DataStart, State::Next(Name::StringBefore), State::Nok)
}
/// Resolve whitespace.
diff --git a/src/content/text.rs b/src/content/text.rs
index 4e93779..1b3890e 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -21,7 +21,8 @@
//! > [whitespace][crate::construct::partial_whitespace].
use crate::construct::partial_whitespace::resolve_whitespace;
-use crate::tokenizer::{State, StateName, Tokenizer};
+use crate::state::{Name, State};
+use crate::tokenizer::Tokenizer;
const MARKERS: [u8; 9] = [
b'!', // `label_start_image`
@@ -39,7 +40,7 @@ const MARKERS: [u8; 9] = [
pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve));
tokenizer.tokenize_state.markers = &MARKERS;
- State::Retry(StateName::TextBefore)
+ State::Retry(Name::TextBefore)
}
/// Before text.
@@ -47,75 +48,71 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
Some(b'!') => tokenizer.attempt(
- StateName::LabelStartImageStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
+ Name::LabelStartImageStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeData),
),
Some(b'&') => tokenizer.attempt(
- StateName::CharacterReferenceStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
+ Name::CharacterReferenceStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeData),
),
Some(b'*' | b'_') => tokenizer.attempt(
- StateName::AttentionStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
+ Name::AttentionStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeData),
),
// `autolink`, `html_text` (order does not matter)
Some(b'<') => tokenizer.attempt(
- StateName::AutolinkStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeHtml),
+ Name::AutolinkStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeHtml),
),
Some(b'[') => tokenizer.attempt(
- StateName::LabelStartLinkStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
+ Name::LabelStartLinkStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeData),
),
Some(b'\\') => tokenizer.attempt(
- StateName::CharacterEscapeStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeHardBreakEscape),
+ Name::CharacterEscapeStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeHardBreakEscape),
),
Some(b']') => tokenizer.attempt(
- StateName::LabelEndStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
+ Name::LabelEndStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeData),
),
Some(b'`') => tokenizer.attempt(
- StateName::CodeTextStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
+ Name::CodeTextStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeData),
),
- _ => State::Retry(StateName::TextBeforeData),
+ _ => State::Retry(Name::TextBeforeData),
}
}
/// At `<`, which wasn’t an autolink: before HTML?
pub fn before_html(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::HtmlTextStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
+ Name::HtmlTextStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeData),
)
}
/// At `\`, which wasn’t a character escape: before a hard break?
pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- StateName::HardBreakEscapeStart,
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
+ Name::HardBreakEscapeStart,
+ State::Next(Name::TextBefore),
+ State::Next(Name::TextBeforeData),
)
}
/// At data.
pub fn before_data(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- StateName::DataStart,
- State::Next(StateName::TextBefore),
- State::Nok,
- )
+ tokenizer.attempt(Name::DataStart, State::Next(Name::TextBefore), State::Nok)
}
/// Resolve whitespace.