diff options
Diffstat (limited to '')
-rw-r--r-- | src/construct/code_indented.rs | 2 | ||||
-rw-r--r-- | src/construct/document.rs | 20 | ||||
-rw-r--r-- | src/construct/flow.rs | 24 | ||||
-rw-r--r-- | src/construct/frontmatter.rs | 293 | ||||
-rw-r--r-- | src/construct/mod.rs | 2 | ||||
-rw-r--r-- | src/construct/thematic_break.rs | 2 |
6 files changed, 327 insertions, 16 deletions
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 89c5652..c5439f1 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -53,8 +53,8 @@ //! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element //! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element -use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::TAB_SIZE; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; diff --git a/src/construct/document.rs b/src/construct/document.rs index 0cda368..2cc170d 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -58,13 +58,29 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ))); tokenizer.attempt( - State::Next(StateName::DocumentContainerExistingBefore), - State::Next(StateName::DocumentContainerExistingBefore), + State::Next(StateName::DocumentBeforeFrontmatter), + State::Next(StateName::DocumentBeforeFrontmatter), ); State::Retry(StateName::BomStart) } +/// At optional frontmatter. +/// +/// ```markdown +/// > | --- +/// ^ +/// | title: Venus +/// | --- +/// ``` +pub fn before_frontmatter(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::DocumentContainerNewBefore), + State::Next(StateName::DocumentContainerNewBefore), + ); + State::Retry(StateName::FrontmatterStart) +} + /// At optional existing containers. // /// ```markdown diff --git a/src/construct/flow.rs b/src/construct/flow.rs index 08c7891..f3c7685 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -35,28 +35,28 @@ use crate::tokenizer::Tokenizer; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'`' | b'~') => { + Some(b'#') => { tokenizer.attempt( State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeParagraph), ); - State::Retry(StateName::CodeFencedStart) + State::Retry(StateName::HeadingAtxStart) } - Some(b'<') => { + Some(b'*' | b'_') => { tokenizer.attempt( State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeParagraph), ); - State::Retry(StateName::HtmlFlowStart) + State::Retry(StateName::ThematicBreakStart) } - Some(b'#') => { + Some(b'<') => { tokenizer.attempt( State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeParagraph), ); - State::Retry(StateName::HeadingAtxStart) + State::Retry(StateName::HtmlFlowStart) } - // Note: `-` is also used in thematic breaks, so it’s not included here. + // Note: `-` is also used in thematic breaks so it’s not included here. Some(b'=') => { tokenizer.attempt( State::Next(StateName::FlowAfter), @@ -64,22 +64,22 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::HeadingSetextStart) } - Some(b'*' | b'_') => { + Some(b'[') => { tokenizer.attempt( State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeParagraph), ); - State::Retry(StateName::ThematicBreakStart) + State::Retry(StateName::DefinitionStart) } - Some(b'[') => { + Some(b'`' | b'~') => { tokenizer.attempt( State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeParagraph), ); - State::Retry(StateName::DefinitionStart) + State::Retry(StateName::CodeFencedStart) } // Actual parsing: blank line? Indented code? Indented anything? - // Also includes `-` which can be a setext heading underline or a thematic break. + // Also includes `-` which can be a setext heading underline or thematic break. None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), // Must be a paragraph. Some(_) => { diff --git a/src/construct/frontmatter.rs b/src/construct/frontmatter.rs new file mode 100644 index 0000000..dc47bee --- /dev/null +++ b/src/construct/frontmatter.rs @@ -0,0 +1,293 @@ +//! Frontmatter occurs at the start of the document. +//! +//! ## Grammar +//! +//! Frontmatter forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! frontmatter ::= fence_open *( eol *byte ) eol fence_close +//! fence_open ::= sequence *space_or_tab +//! ; Restriction: markers in `sequence` must match markers in opening sequence. +//! fence_close ::= sequence *space_or_tab +//! sequence ::= 3'+' | 3'-' +//! ``` +//! +//! Frontmatter can only occur once. +//! It cannot occur in a container. +//! It must have a closing fence. +//! Like flow constructs, it must be followed by an eol (line ending) or +//! eof (end of file). +//! +//! ## Extension +//! +//! > 👉 **Note**: frontmatter is not part of `CommonMark`, so frontmatter is +//! > not enabled by default. +//! > You need to enable it manually. +//! > See [`Constructs`][constructs] for more info. +//! +//! As there is no spec for frontmatter in markdown, this extension follows how +//! YAML frontmatter works on `github.com`. +//! It also parses TOML frontmatter, just like YAML except that it uses a `+`. +//! +//! ## Recommendation +//! +//! When authoring markdown with frontmatter, it’s recommended to use YAML +//! frontmatter if possible. +//! While YAML has some warts, it works in the most places, so using it +//! guarantees the highest chance of portability. +//! +//! In certain ecosystems, other flavors are widely used. +//! For example, in the Rust ecosystem, TOML is often used. +//! In such cases, using TOML is an okay choice. +//! +//! ## Tokens +//! +//! * [`Frontmatter`][Name::Frontmatter] +//! * [`FrontmatterFence`][Name::FrontmatterFence] +//! * [`FrontmatterSequence`][Name::FrontmatterSequence] +//! * [`FrontmatterChunk`][Name::FrontmatterChunk] +//! * [`LineEnding`][Name::LineEnding] +//! * [`SpaceOrTab`][Name::SpaceOrTab] +//! +//! ## References +//! +//! * [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter) +//! +//! [constructs]: crate::Constructs + +use crate::constant::FRONTMATTER_SEQUENCE_SIZE; +use crate::construct::partial_space_or_tab::space_or_tab; +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; + +/// Start of frontmatter. +/// +/// ```markdown +/// > | --- +/// ^ +/// | title: "Venus" +/// | --- +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + // Indent not allowed. + if tokenizer.parse_state.constructs.frontmatter + && matches!(tokenizer.current, Some(b'+' | b'-')) + { + tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); + tokenizer.enter(Name::Frontmatter); + tokenizer.enter(Name::FrontmatterFence); + tokenizer.enter(Name::FrontmatterSequence); + State::Retry(StateName::FrontmatterOpenSequence) + } else { + State::Nok + } +} + +/// In open sequence. +/// +/// ```markdown +/// > | --- +/// ^ +/// | title: "Venus" +/// | --- +/// ``` +pub fn open_sequence(tokenizer: &mut Tokenizer) -> State { + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.tokenize_state.size += 1; + tokenizer.consume(); + State::Next(StateName::FrontmatterOpenSequence) + } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE { + tokenizer.tokenize_state.size = 0; + tokenizer.exit(Name::FrontmatterSequence); + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::FrontmatterOpenAfter), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Retry(StateName::FrontmatterOpenAfter) + } + } else { + tokenizer.tokenize_state.marker = 0; + tokenizer.tokenize_state.size = 0; + State::Nok + } +} + +/// After open sequence. +/// +/// ```markdown +/// > | --- +/// ^ +/// | title: "Venus" +/// | --- +/// ``` +pub fn open_after(tokenizer: &mut Tokenizer) -> State { + if let Some(b'\n') = tokenizer.current { + tokenizer.exit(Name::FrontmatterFence); + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + tokenizer.attempt( + State::Next(StateName::FrontmatterAfter), + State::Next(StateName::FrontmatterContentStart), + ); + State::Next(StateName::FrontmatterCloseStart) + } else { + tokenizer.tokenize_state.marker = 0; + State::Nok + } +} + +/// Start of close sequence. +/// +/// ```markdown +/// | --- +/// | title: "Venus" +/// > | --- +/// ^ +/// ``` +pub fn close_start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.enter(Name::FrontmatterFence); + tokenizer.enter(Name::FrontmatterSequence); + State::Retry(StateName::FrontmatterCloseSequence) + } else { + State::Nok + } +} + +/// In close sequence. +/// +/// ```markdown +/// | --- +/// | title: "Venus" +/// > | --- +/// ^ +/// ``` +pub fn close_sequence(tokenizer: &mut Tokenizer) -> State { + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.tokenize_state.size += 1; + tokenizer.consume(); + State::Next(StateName::FrontmatterCloseSequence) + } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE { + tokenizer.tokenize_state.size = 0; + tokenizer.exit(Name::FrontmatterSequence); + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::FrontmatterCloseAfter), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + State::Retry(StateName::FrontmatterCloseAfter) + } + } else { + tokenizer.tokenize_state.size = 0; + State::Nok + } +} + +/// After close sequence. +/// +/// ```markdown +/// | --- +/// | title: "Venus" +/// > | --- +/// ^ +/// ``` +pub fn close_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::FrontmatterFence); + State::Ok + } + _ => State::Nok, + } +} + +/// Start of content chunk. +/// +/// ```markdown +/// | --- +/// > | title: "Venus" +/// ^ +/// | --- +/// ``` +pub fn content_start(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => State::Retry(StateName::FrontmatterContentEnd), + Some(_) => { + tokenizer.enter(Name::FrontmatterChunk); + State::Retry(StateName::FrontmatterContentInside) + } + } +} + +/// In content chunk. +/// +/// ```markdown +/// | --- +/// > | title: "Venus" +/// ^ +/// | --- +/// ``` +pub fn content_inside(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::FrontmatterChunk); + State::Retry(StateName::FrontmatterContentEnd) + } + Some(_) => { + tokenizer.consume(); + State::Next(StateName::FrontmatterContentInside) + } + } +} + +/// End of content chunk. +/// +/// ```markdown +/// | --- +/// > | title: "Venus" +/// ^ +/// | --- +/// ``` +pub fn content_end(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => { + tokenizer.tokenize_state.marker = 0; + State::Nok + } + Some(b'\n') => { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + tokenizer.attempt( + State::Next(StateName::FrontmatterAfter), + State::Next(StateName::FrontmatterContentStart), + ); + State::Next(StateName::FrontmatterCloseStart) + } + Some(_) => unreachable!("expected eof/eol"), + } +} + +/// After frontmatter. +/// +/// ```markdown +/// | --- +/// | title: "Venus" +/// > | --- +/// ^ +/// ``` +pub fn after(tokenizer: &mut Tokenizer) -> State { + tokenizer.tokenize_state.marker = 0; + + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::Frontmatter); + State::Ok + } + _ => State::Nok, + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 5630143..1c1c6f7 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -40,6 +40,7 @@ //! * [code (indented)][code_indented] //! * [code (text)][code_text] //! * [definition][] +//! * [frontmatter][] //! * [hard break (escape)][hard_break_escape] //! * [heading (atx)][heading_atx] //! * [heading (setext)][heading_setext] @@ -139,6 +140,7 @@ pub mod code_text; pub mod definition; pub mod document; pub mod flow; +pub mod frontmatter; pub mod hard_break_escape; pub mod heading_atx; pub mod heading_setext; diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 0a8ebe9..74fd961 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -56,8 +56,8 @@ //! [list-item]: crate::construct::list_item //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-hr-element -use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN}; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; |