Refactor to move `content` to `construct`

author: Titus Wormer <tituswormer@gmail.com> 2022-08-15 11:40:40 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-08-15 11:40:40 +0200
commit: ee967aa634b5f8e9d30329d587538f1371a5da95 (patch)
tree: cdc1461c822e440b24428eb8d431881e216ab8bd /src/construct/flow.rs
parent: 13135666fac476f3cd6f059147f496533b304097 (diff)
download: markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.gz
markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.bz2
markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.zip
1 files changed, 254 insertions, 0 deletions
diff --git a/src/construct/flow.rs b/src/construct/flow.rs
new file mode 100644
index 0000000..08c7891
--- /dev/null
+++ b/src/construct/flow.rs
@@ -0,0 +1,254 @@
+//! The flow content type.
+//!
+//! **Flow** represents the sections, such as headings and code, which are
+//! parsed per line.
+//! An example is HTML, which has a certain starting condition (such as
+//! `<script>` on its own line), then continues for a while, until an end
+//! condition is found (such as `</style>`).
+//! If that line with an end condition is never found, that flow goes until
+//! the end.
+//!
+//! The constructs found in flow are:
+//!
+//! *   [Blank line][crate::construct::blank_line]
+//! *   [Code (fenced)][crate::construct::code_fenced]
+//! *   [Code (indented)][crate::construct::code_indented]
+//! *   [Definition][crate::construct::definition]
+//! *   [Heading (atx)][crate::construct::heading_atx]
+//! *   [Heading (setext)][crate::construct::heading_setext]
+//! *   [HTML (flow)][crate::construct::html_flow]
+//! *   [Thematic break][crate::construct::thematic_break]
+
+use crate::event::Name;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Start of flow.
+//
+/// ```markdown
+/// > | ## alpha
+///     ^
+/// > |     bravo
+///     ^
+/// > | ***
+///     ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        Some(b'`' | b'~') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::CodeFencedStart)
+        }
+        Some(b'<') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::HtmlFlowStart)
+        }
+        Some(b'#') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::HeadingAtxStart)
+        }
+        // Note: `-` is also used in thematic breaks, so it’s not included here.
+        Some(b'=') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::HeadingSetextStart)
+        }
+        Some(b'*' | b'_') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::ThematicBreakStart)
+        }
+        Some(b'[') => {
+            tokenizer.attempt(
+                State::Next(StateName::FlowAfter),
+                State::Next(StateName::FlowBeforeParagraph),
+            );
+            State::Retry(StateName::DefinitionStart)
+        }
+        // Actual parsing: blank line? Indented code? Indented anything?
+        // Also includes `-` which can be a setext heading underline or a thematic break.
+        None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
+        // Must be a paragraph.
+        Some(_) => {
+            tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
+            State::Retry(StateName::ParagraphStart)
+        }
+    }
+}
+
+/// At blank line.
+///
+/// ```markdown
+/// > | ␠␠␊
+///     ^
+/// ```
+pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowBlankLineAfter),
+        State::Next(StateName::FlowBeforeCodeIndented),
+    );
+    State::Retry(StateName::BlankLineStart)
+}
+
+/// At code (indented).
+///
+/// ```markdown
+/// > | ␠␠␠␠a
+///     ^
+/// ```
+pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeCodeFenced),
+    );
+    State::Retry(StateName::CodeIndentedStart)
+}
+
+/// At code (fenced).
+///
+/// ````markdown
+/// > | ```
+///     ^
+/// ````
+pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeHtml),
+    );
+    State::Retry(StateName::CodeFencedStart)
+}
+
+/// At html (flow).
+///
+/// ```markdown
+/// > | <a>
+///     ^
+/// ```
+pub fn before_html(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeHeadingAtx),
+    );
+    State::Retry(StateName::HtmlFlowStart)
+}
+
+/// At heading (atx).
+///
+/// ```markdown
+/// > | # a
+///     ^
+/// ```
+pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeHeadingSetext),
+    );
+    State::Retry(StateName::HeadingAtxStart)
+}
+
+/// At heading (setext).
+///
+/// ```markdown
+///   | a
+/// > | =
+///     ^
+/// ```
+pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeThematicBreak),
+    );
+    State::Retry(StateName::HeadingSetextStart)
+}
+
+/// At thematic break.
+///
+/// ```markdown
+/// > | ***
+///     ^
+/// ```
+pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeDefinition),
+    );
+    State::Retry(StateName::ThematicBreakStart)
+}
+
+/// At definition.
+///
+/// ```markdown
+/// > | [a]: b
+///     ^
+/// ```
+pub fn before_definition(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        State::Next(StateName::FlowAfter),
+        State::Next(StateName::FlowBeforeParagraph),
+    );
+    State::Retry(StateName::DefinitionStart)
+}
+
+/// At paragraph.
+///
+/// ```markdown
+/// > | a
+///     ^
+/// ```
+pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
+    State::Retry(StateName::ParagraphStart)
+}
+
+/// After blank line.
+///
+/// ```markdown
+/// > | ␠␠␊
+///       ^
+/// ```
+pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        None => State::Ok,
+        Some(b'\n') => {
+            tokenizer.enter(Name::BlankLineEnding);
+            tokenizer.consume();
+            tokenizer.exit(Name::BlankLineEnding);
+            // Feel free to interrupt.
+            tokenizer.interrupt = false;
+            State::Next(StateName::FlowStart)
+        }
+        _ => unreachable!("expected eol/eof"),
+    }
+}
+
+/// After flow.
+///
+/// ```markdown
+/// > | # a␊
+///        ^
+/// ```
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        None => State::Ok,
+        Some(b'\n') => {
+            tokenizer.enter(Name::LineEnding);
+            tokenizer.consume();
+            tokenizer.exit(Name::LineEnding);
+            State::Next(StateName::FlowStart)
+        }
+        _ => unreachable!("expected eol/eof"),
+    }
+}
author	Titus Wormer <tituswormer@gmail.com>	2022-08-15 11:40:40 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-08-15 11:40:40 +0200
commit	ee967aa634b5f8e9d30329d587538f1371a5da95 (patch)
tree	cdc1461c822e440b24428eb8d431881e216ab8bd /src/construct/flow.rs
parent	13135666fac476f3cd6f059147f496533b304097 (diff)
download	markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.gz markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.bz2 markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.zip