diff options
| author | 2022-08-16 16:49:29 +0200 | |
|---|---|---|
| committer | 2022-08-16 16:49:53 +0200 | |
| commit | 6ee90b34c87354baf8e03d5469a92cf5dd17a82b (patch) | |
| tree | cfa64be772be6464e6f790dabccf8a77e7afe60e /src/construct | |
| parent | 93d0b7c6465f4ffe220b3ddada729746b11eb6ce (diff) | |
| download | markdown-rs-6ee90b34c87354baf8e03d5469a92cf5dd17a82b.tar.gz markdown-rs-6ee90b34c87354baf8e03d5469a92cf5dd17a82b.tar.bz2 markdown-rs-6ee90b34c87354baf8e03d5469a92cf5dd17a82b.zip | |
Add support for frontmatter
Diffstat (limited to '')
| -rw-r--r-- | src/construct/code_indented.rs | 2 | ||||
| -rw-r--r-- | src/construct/document.rs | 20 | ||||
| -rw-r--r-- | src/construct/flow.rs | 24 | ||||
| -rw-r--r-- | src/construct/frontmatter.rs | 293 | ||||
| -rw-r--r-- | src/construct/mod.rs | 2 | ||||
| -rw-r--r-- | src/construct/thematic_break.rs | 2 | 
6 files changed, 327 insertions, 16 deletions
| diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 89c5652..c5439f1 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -53,8 +53,8 @@  //! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element  //! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element -use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::constant::TAB_SIZE; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::event::Name;  use crate::state::{Name as StateName, State};  use crate::tokenizer::Tokenizer; diff --git a/src/construct/document.rs b/src/construct/document.rs index 0cda368..2cc170d 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -58,13 +58,29 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {      )));      tokenizer.attempt( -        State::Next(StateName::DocumentContainerExistingBefore), -        State::Next(StateName::DocumentContainerExistingBefore), +        State::Next(StateName::DocumentBeforeFrontmatter), +        State::Next(StateName::DocumentBeforeFrontmatter),      );      State::Retry(StateName::BomStart)  } +/// At optional frontmatter. +/// +/// ```markdown +/// > | --- +///     ^ +///   | title: Venus +///   | --- +/// ``` +pub fn before_frontmatter(tokenizer: &mut Tokenizer) -> State { +    tokenizer.attempt( +        State::Next(StateName::DocumentContainerNewBefore), +        State::Next(StateName::DocumentContainerNewBefore), +    ); +    State::Retry(StateName::FrontmatterStart) +} +  /// At optional existing containers.  //  /// ```markdown diff --git a/src/construct/flow.rs b/src/construct/flow.rs index 08c7891..f3c7685 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -35,28 +35,28 @@ use crate::tokenizer::Tokenizer;  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(b'`' | b'~') => { +        Some(b'#') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter),                  State::Next(StateName::FlowBeforeParagraph),              ); -            State::Retry(StateName::CodeFencedStart) +            State::Retry(StateName::HeadingAtxStart)          } -        Some(b'<') => { +        Some(b'*' | b'_') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter),                  State::Next(StateName::FlowBeforeParagraph),              ); -            State::Retry(StateName::HtmlFlowStart) +            State::Retry(StateName::ThematicBreakStart)          } -        Some(b'#') => { +        Some(b'<') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter),                  State::Next(StateName::FlowBeforeParagraph),              ); -            State::Retry(StateName::HeadingAtxStart) +            State::Retry(StateName::HtmlFlowStart)          } -        // Note: `-` is also used in thematic breaks, so it’s not included here. +        // Note: `-` is also used in thematic breaks so it’s not included here.          Some(b'=') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter), @@ -64,22 +64,22 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              );              State::Retry(StateName::HeadingSetextStart)          } -        Some(b'*' | b'_') => { +        Some(b'[') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter),                  State::Next(StateName::FlowBeforeParagraph),              ); -            State::Retry(StateName::ThematicBreakStart) +            State::Retry(StateName::DefinitionStart)          } -        Some(b'[') => { +        Some(b'`' | b'~') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter),                  State::Next(StateName::FlowBeforeParagraph),              ); -            State::Retry(StateName::DefinitionStart) +            State::Retry(StateName::CodeFencedStart)          }          // Actual parsing: blank line? Indented code? Indented anything? -        // Also includes `-` which can be a setext heading underline or a thematic break. +        // Also includes `-` which can be a setext heading underline or thematic break.          None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),          // Must be a paragraph.          Some(_) => { diff --git a/src/construct/frontmatter.rs b/src/construct/frontmatter.rs new file mode 100644 index 0000000..dc47bee --- /dev/null +++ b/src/construct/frontmatter.rs @@ -0,0 +1,293 @@ +//! Frontmatter occurs at the start of the document. +//! +//! ## Grammar +//! +//! Frontmatter forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! frontmatter ::= fence_open *( eol *byte ) eol fence_close +//! fence_open ::= sequence *space_or_tab +//! ; Restriction: markers in `sequence` must match markers in opening sequence. +//! fence_close ::= sequence *space_or_tab +//! sequence ::= 3'+' | 3'-' +//! ``` +//! +//! Frontmatter can only occur once. +//! It cannot occur in a container. +//! It must have a closing fence. +//! Like flow constructs, it must be followed by an eol (line ending) or +//! eof (end of file). +//! +//! ## Extension +//! +//! > 👉 **Note**: frontmatter is not part of `CommonMark`, so frontmatter is +//! > not enabled by default. +//! > You need to enable it manually. +//! > See [`Constructs`][constructs] for more info. +//! +//! As there is no spec for frontmatter in markdown, this extension follows how +//! YAML frontmatter works on `github.com`. +//! It also parses TOML frontmatter, just like YAML except that it uses a `+`. +//! +//! ## Recommendation +//! +//! When authoring markdown with frontmatter, it’s recommended to use YAML +//! frontmatter if possible. +//! While YAML has some warts, it works in the most places, so using it +//! guarantees the highest chance of portability. +//! +//! In certain ecosystems, other flavors are widely used. +//! For example, in the Rust ecosystem, TOML is often used. +//! In such cases, using TOML is an okay choice. +//! +//! ## Tokens +//! +//! *   [`Frontmatter`][Name::Frontmatter] +//! *   [`FrontmatterFence`][Name::FrontmatterFence] +//! *   [`FrontmatterSequence`][Name::FrontmatterSequence] +//! *   [`FrontmatterChunk`][Name::FrontmatterChunk] +//! *   [`LineEnding`][Name::LineEnding] +//! *   [`SpaceOrTab`][Name::SpaceOrTab] +//! +//! ## References +//! +//! *   [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter) +//! +//! [constructs]: crate::Constructs + +use crate::constant::FRONTMATTER_SEQUENCE_SIZE; +use crate::construct::partial_space_or_tab::space_or_tab; +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; + +/// Start of frontmatter. +/// +/// ```markdown +/// > | --- +///     ^ +///   | title: "Venus" +///   | --- +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { +    // Indent not allowed. +    if tokenizer.parse_state.constructs.frontmatter +        && matches!(tokenizer.current, Some(b'+' | b'-')) +    { +        tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); +        tokenizer.enter(Name::Frontmatter); +        tokenizer.enter(Name::FrontmatterFence); +        tokenizer.enter(Name::FrontmatterSequence); +        State::Retry(StateName::FrontmatterOpenSequence) +    } else { +        State::Nok +    } +} + +/// In open sequence. +/// +/// ```markdown +/// > | --- +///     ^ +///   | title: "Venus" +///   | --- +/// ``` +pub fn open_sequence(tokenizer: &mut Tokenizer) -> State { +    if tokenizer.current == Some(tokenizer.tokenize_state.marker) { +        tokenizer.tokenize_state.size += 1; +        tokenizer.consume(); +        State::Next(StateName::FrontmatterOpenSequence) +    } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE { +        tokenizer.tokenize_state.size = 0; +        tokenizer.exit(Name::FrontmatterSequence); + +        if matches!(tokenizer.current, Some(b'\t' | b' ')) { +            tokenizer.attempt(State::Next(StateName::FrontmatterOpenAfter), State::Nok); +            State::Retry(space_or_tab(tokenizer)) +        } else { +            State::Retry(StateName::FrontmatterOpenAfter) +        } +    } else { +        tokenizer.tokenize_state.marker = 0; +        tokenizer.tokenize_state.size = 0; +        State::Nok +    } +} + +/// After open sequence. +/// +/// ```markdown +/// > | --- +///        ^ +///   | title: "Venus" +///   | --- +/// ``` +pub fn open_after(tokenizer: &mut Tokenizer) -> State { +    if let Some(b'\n') = tokenizer.current { +        tokenizer.exit(Name::FrontmatterFence); +        tokenizer.enter(Name::LineEnding); +        tokenizer.consume(); +        tokenizer.exit(Name::LineEnding); +        tokenizer.attempt( +            State::Next(StateName::FrontmatterAfter), +            State::Next(StateName::FrontmatterContentStart), +        ); +        State::Next(StateName::FrontmatterCloseStart) +    } else { +        tokenizer.tokenize_state.marker = 0; +        State::Nok +    } +} + +/// Start of close sequence. +/// +/// ```markdown +///   | --- +///   | title: "Venus" +/// > | --- +///     ^ +/// ``` +pub fn close_start(tokenizer: &mut Tokenizer) -> State { +    if tokenizer.current == Some(tokenizer.tokenize_state.marker) { +        tokenizer.enter(Name::FrontmatterFence); +        tokenizer.enter(Name::FrontmatterSequence); +        State::Retry(StateName::FrontmatterCloseSequence) +    } else { +        State::Nok +    } +} + +/// In close sequence. +/// +/// ```markdown +///   | --- +///   | title: "Venus" +/// > | --- +///     ^ +/// ``` +pub fn close_sequence(tokenizer: &mut Tokenizer) -> State { +    if tokenizer.current == Some(tokenizer.tokenize_state.marker) { +        tokenizer.tokenize_state.size += 1; +        tokenizer.consume(); +        State::Next(StateName::FrontmatterCloseSequence) +    } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE { +        tokenizer.tokenize_state.size = 0; +        tokenizer.exit(Name::FrontmatterSequence); + +        if matches!(tokenizer.current, Some(b'\t' | b' ')) { +            tokenizer.attempt(State::Next(StateName::FrontmatterCloseAfter), State::Nok); +            State::Retry(space_or_tab(tokenizer)) +        } else { +            State::Retry(StateName::FrontmatterCloseAfter) +        } +    } else { +        tokenizer.tokenize_state.size = 0; +        State::Nok +    } +} + +/// After close sequence. +/// +/// ```markdown +///   | --- +///   | title: "Venus" +/// > | --- +///        ^ +/// ``` +pub fn close_after(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        None | Some(b'\n') => { +            tokenizer.exit(Name::FrontmatterFence); +            State::Ok +        } +        _ => State::Nok, +    } +} + +/// Start of content chunk. +/// +/// ```markdown +///   | --- +/// > | title: "Venus" +///     ^ +///   | --- +/// ``` +pub fn content_start(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        None | Some(b'\n') => State::Retry(StateName::FrontmatterContentEnd), +        Some(_) => { +            tokenizer.enter(Name::FrontmatterChunk); +            State::Retry(StateName::FrontmatterContentInside) +        } +    } +} + +/// In content chunk. +/// +/// ```markdown +///   | --- +/// > | title: "Venus" +///     ^ +///   | --- +/// ``` +pub fn content_inside(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        None | Some(b'\n') => { +            tokenizer.exit(Name::FrontmatterChunk); +            State::Retry(StateName::FrontmatterContentEnd) +        } +        Some(_) => { +            tokenizer.consume(); +            State::Next(StateName::FrontmatterContentInside) +        } +    } +} + +/// End of content chunk. +/// +/// ```markdown +///   | --- +/// > | title: "Venus" +///                   ^ +///   | --- +/// ``` +pub fn content_end(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        None => { +            tokenizer.tokenize_state.marker = 0; +            State::Nok +        } +        Some(b'\n') => { +            tokenizer.enter(Name::LineEnding); +            tokenizer.consume(); +            tokenizer.exit(Name::LineEnding); +            tokenizer.attempt( +                State::Next(StateName::FrontmatterAfter), +                State::Next(StateName::FrontmatterContentStart), +            ); +            State::Next(StateName::FrontmatterCloseStart) +        } +        Some(_) => unreachable!("expected eof/eol"), +    } +} + +/// After frontmatter. +/// +/// ```markdown +///   | --- +///   | title: "Venus" +/// > | --- +///        ^ +/// ``` +pub fn after(tokenizer: &mut Tokenizer) -> State { +    tokenizer.tokenize_state.marker = 0; + +    match tokenizer.current { +        None | Some(b'\n') => { +            tokenizer.exit(Name::Frontmatter); +            State::Ok +        } +        _ => State::Nok, +    } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 5630143..1c1c6f7 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -40,6 +40,7 @@  //! *   [code (indented)][code_indented]  //! *   [code (text)][code_text]  //! *   [definition][] +//! *   [frontmatter][]  //! *   [hard break (escape)][hard_break_escape]  //! *   [heading (atx)][heading_atx]  //! *   [heading (setext)][heading_setext] @@ -139,6 +140,7 @@ pub mod code_text;  pub mod definition;  pub mod document;  pub mod flow; +pub mod frontmatter;  pub mod hard_break_escape;  pub mod heading_atx;  pub mod heading_setext; diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 0a8ebe9..74fd961 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -56,8 +56,8 @@  //! [list-item]: crate::construct::list_item  //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-hr-element -use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN}; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::event::Name;  use crate::state::{Name as StateName, State};  use crate::tokenizer::Tokenizer; | 
