diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-15 11:40:40 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-15 11:40:40 +0200 |
commit | ee967aa634b5f8e9d30329d587538f1371a5da95 (patch) | |
tree | cdc1461c822e440b24428eb8d431881e216ab8bd /src/construct/flow.rs | |
parent | 13135666fac476f3cd6f059147f496533b304097 (diff) | |
download | markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.gz markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.bz2 markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.zip |
Refactor to move `content` to `construct`
Diffstat (limited to 'src/construct/flow.rs')
-rw-r--r-- | src/construct/flow.rs | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/src/construct/flow.rs b/src/construct/flow.rs new file mode 100644 index 0000000..08c7891 --- /dev/null +++ b/src/construct/flow.rs @@ -0,0 +1,254 @@ +//! The flow content type. +//! +//! **Flow** represents the sections, such as headings and code, which are +//! parsed per line. +//! An example is HTML, which has a certain starting condition (such as +//! `<script>` on its own line), then continues for a while, until an end +//! condition is found (such as `</style>`). +//! If that line with an end condition is never found, that flow goes until +//! the end. +//! +//! The constructs found in flow are: +//! +//! * [Blank line][crate::construct::blank_line] +//! * [Code (fenced)][crate::construct::code_fenced] +//! * [Code (indented)][crate::construct::code_indented] +//! * [Definition][crate::construct::definition] +//! * [Heading (atx)][crate::construct::heading_atx] +//! * [Heading (setext)][crate::construct::heading_setext] +//! * [HTML (flow)][crate::construct::html_flow] +//! * [Thematic break][crate::construct::thematic_break] + +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; + +/// Start of flow. +// +/// ```markdown +/// > | ## alpha +/// ^ +/// > | bravo +/// ^ +/// > | *** +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'`' | b'~') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::CodeFencedStart) + } + Some(b'<') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::HtmlFlowStart) + } + Some(b'#') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::HeadingAtxStart) + } + // Note: `-` is also used in thematic breaks, so it’s not included here. + Some(b'=') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::HeadingSetextStart) + } + Some(b'*' | b'_') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::ThematicBreakStart) + } + Some(b'[') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::DefinitionStart) + } + // Actual parsing: blank line? Indented code? Indented anything? + // Also includes `-` which can be a setext heading underline or a thematic break. + None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), + // Must be a paragraph. + Some(_) => { + tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); + State::Retry(StateName::ParagraphStart) + } + } +} + +/// At blank line. +/// +/// ```markdown +/// > | ␠␠␊ +/// ^ +/// ``` +pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowBlankLineAfter), + State::Next(StateName::FlowBeforeCodeIndented), + ); + State::Retry(StateName::BlankLineStart) +} + +/// At code (indented). +/// +/// ```markdown +/// > | ␠␠␠␠a +/// ^ +/// ``` +pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeCodeFenced), + ); + State::Retry(StateName::CodeIndentedStart) +} + +/// At code (fenced). +/// +/// ````markdown +/// > | ``` +/// ^ +/// ```` +pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHtml), + ); + State::Retry(StateName::CodeFencedStart) +} + +/// At html (flow). +/// +/// ```markdown +/// > | <a> +/// ^ +/// ``` +pub fn before_html(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHeadingAtx), + ); + State::Retry(StateName::HtmlFlowStart) +} + +/// At heading (atx). +/// +/// ```markdown +/// > | # a +/// ^ +/// ``` +pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHeadingSetext), + ); + State::Retry(StateName::HeadingAtxStart) +} + +/// At heading (setext). +/// +/// ```markdown +/// | a +/// > | = +/// ^ +/// ``` +pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeThematicBreak), + ); + State::Retry(StateName::HeadingSetextStart) +} + +/// At thematic break. +/// +/// ```markdown +/// > | *** +/// ^ +/// ``` +pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeDefinition), + ); + State::Retry(StateName::ThematicBreakStart) +} + +/// At definition. +/// +/// ```markdown +/// > | [a]: b +/// ^ +/// ``` +pub fn before_definition(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::DefinitionStart) +} + +/// At paragraph. +/// +/// ```markdown +/// > | a +/// ^ +/// ``` +pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); + State::Retry(StateName::ParagraphStart) +} + +/// After blank line. +/// +/// ```markdown +/// > | ␠␠␊ +/// ^ +/// ``` +pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => State::Ok, + Some(b'\n') => { + tokenizer.enter(Name::BlankLineEnding); + tokenizer.consume(); + tokenizer.exit(Name::BlankLineEnding); + // Feel free to interrupt. + tokenizer.interrupt = false; + State::Next(StateName::FlowStart) + } + _ => unreachable!("expected eol/eof"), + } +} + +/// After flow. +/// +/// ```markdown +/// > | # a␊ +/// ^ +/// ``` +pub fn after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => State::Ok, + Some(b'\n') => { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::FlowStart) + } + _ => unreachable!("expected eol/eof"), + } +} |