aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/frontmatter.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-16 16:49:29 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-16 16:49:53 +0200
commit6ee90b34c87354baf8e03d5469a92cf5dd17a82b (patch)
treecfa64be772be6464e6f790dabccf8a77e7afe60e /src/construct/frontmatter.rs
parent93d0b7c6465f4ffe220b3ddada729746b11eb6ce (diff)
downloadmarkdown-rs-6ee90b34c87354baf8e03d5469a92cf5dd17a82b.tar.gz
markdown-rs-6ee90b34c87354baf8e03d5469a92cf5dd17a82b.tar.bz2
markdown-rs-6ee90b34c87354baf8e03d5469a92cf5dd17a82b.zip
Add support for frontmatter
Diffstat (limited to 'src/construct/frontmatter.rs')
-rw-r--r--src/construct/frontmatter.rs293
1 files changed, 293 insertions, 0 deletions
diff --git a/src/construct/frontmatter.rs b/src/construct/frontmatter.rs
new file mode 100644
index 0000000..dc47bee
--- /dev/null
+++ b/src/construct/frontmatter.rs
@@ -0,0 +1,293 @@
+//! Frontmatter occurs at the start of the document.
+//!
+//! ## Grammar
+//!
+//! Frontmatter forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! frontmatter ::= fence_open *( eol *byte ) eol fence_close
+//! fence_open ::= sequence *space_or_tab
+//! ; Restriction: markers in `sequence` must match markers in opening sequence.
+//! fence_close ::= sequence *space_or_tab
+//! sequence ::= 3'+' | 3'-'
+//! ```
+//!
+//! Frontmatter can only occur once.
+//! It cannot occur in a container.
+//! It must have a closing fence.
+//! Like flow constructs, it must be followed by an eol (line ending) or
+//! eof (end of file).
+//!
+//! ## Extension
+//!
+//! > ๐Ÿ‘‰ **Note**: frontmatter is not part of `CommonMark`, so frontmatter is
+//! > not enabled by default.
+//! > You need to enable it manually.
+//! > See [`Constructs`][constructs] for more info.
+//!
+//! As there is no spec for frontmatter in markdown, this extension follows how
+//! YAML frontmatter works on `github.com`.
+//! It also parses TOML frontmatter, just like YAML except that it uses a `+`.
+//!
+//! ## Recommendation
+//!
+//! When authoring markdown with frontmatter, itโ€™s recommended to use YAML
+//! frontmatter if possible.
+//! While YAML has some warts, it works in the most places, so using it
+//! guarantees the highest chance of portability.
+//!
+//! In certain ecosystems, other flavors are widely used.
+//! For example, in the Rust ecosystem, TOML is often used.
+//! In such cases, using TOML is an okay choice.
+//!
+//! ## Tokens
+//!
+//! * [`Frontmatter`][Name::Frontmatter]
+//! * [`FrontmatterFence`][Name::FrontmatterFence]
+//! * [`FrontmatterSequence`][Name::FrontmatterSequence]
+//! * [`FrontmatterChunk`][Name::FrontmatterChunk]
+//! * [`LineEnding`][Name::LineEnding]
+//! * [`SpaceOrTab`][Name::SpaceOrTab]
+//!
+//! ## References
+//!
+//! * [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter)
+//!
+//! [constructs]: crate::Constructs
+
+use crate::constant::FRONTMATTER_SEQUENCE_SIZE;
+use crate::construct::partial_space_or_tab::space_or_tab;
+use crate::event::Name;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Start of frontmatter.
+///
+/// ```markdown
+/// > | ---
+/// ^
+/// | title: "Venus"
+/// | ---
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ // Indent not allowed.
+ if tokenizer.parse_state.constructs.frontmatter
+ && matches!(tokenizer.current, Some(b'+' | b'-'))
+ {
+ tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
+ tokenizer.enter(Name::Frontmatter);
+ tokenizer.enter(Name::FrontmatterFence);
+ tokenizer.enter(Name::FrontmatterSequence);
+ State::Retry(StateName::FrontmatterOpenSequence)
+ } else {
+ State::Nok
+ }
+}
+
+/// In open sequence.
+///
+/// ```markdown
+/// > | ---
+/// ^
+/// | title: "Venus"
+/// | ---
+/// ```
+pub fn open_sequence(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::FrontmatterOpenSequence)
+ } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE {
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.exit(Name::FrontmatterSequence);
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::FrontmatterOpenAfter), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::FrontmatterOpenAfter)
+ }
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ }
+}
+
+/// After open sequence.
+///
+/// ```markdown
+/// > | ---
+/// ^
+/// | title: "Venus"
+/// | ---
+/// ```
+pub fn open_after(tokenizer: &mut Tokenizer) -> State {
+ if let Some(b'\n') = tokenizer.current {
+ tokenizer.exit(Name::FrontmatterFence);
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ tokenizer.attempt(
+ State::Next(StateName::FrontmatterAfter),
+ State::Next(StateName::FrontmatterContentStart),
+ );
+ State::Next(StateName::FrontmatterCloseStart)
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
+ }
+}
+
+/// Start of close sequence.
+///
+/// ```markdown
+/// | ---
+/// | title: "Venus"
+/// > | ---
+/// ^
+/// ```
+pub fn close_start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.enter(Name::FrontmatterFence);
+ tokenizer.enter(Name::FrontmatterSequence);
+ State::Retry(StateName::FrontmatterCloseSequence)
+ } else {
+ State::Nok
+ }
+}
+
+/// In close sequence.
+///
+/// ```markdown
+/// | ---
+/// | title: "Venus"
+/// > | ---
+/// ^
+/// ```
+pub fn close_sequence(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::FrontmatterCloseSequence)
+ } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE {
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.exit(Name::FrontmatterSequence);
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::FrontmatterCloseAfter), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::FrontmatterCloseAfter)
+ }
+ } else {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ }
+}
+
+/// After close sequence.
+///
+/// ```markdown
+/// | ---
+/// | title: "Venus"
+/// > | ---
+/// ^
+/// ```
+pub fn close_after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::FrontmatterFence);
+ State::Ok
+ }
+ _ => State::Nok,
+ }
+}
+
+/// Start of content chunk.
+///
+/// ```markdown
+/// | ---
+/// > | title: "Venus"
+/// ^
+/// | ---
+/// ```
+pub fn content_start(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => State::Retry(StateName::FrontmatterContentEnd),
+ Some(_) => {
+ tokenizer.enter(Name::FrontmatterChunk);
+ State::Retry(StateName::FrontmatterContentInside)
+ }
+ }
+}
+
+/// In content chunk.
+///
+/// ```markdown
+/// | ---
+/// > | title: "Venus"
+/// ^
+/// | ---
+/// ```
+pub fn content_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::FrontmatterChunk);
+ State::Retry(StateName::FrontmatterContentEnd)
+ }
+ Some(_) => {
+ tokenizer.consume();
+ State::Next(StateName::FrontmatterContentInside)
+ }
+ }
+}
+
+/// End of content chunk.
+///
+/// ```markdown
+/// | ---
+/// > | title: "Venus"
+/// ^
+/// | ---
+/// ```
+pub fn content_end(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
+ }
+ Some(b'\n') => {
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ tokenizer.attempt(
+ State::Next(StateName::FrontmatterAfter),
+ State::Next(StateName::FrontmatterContentStart),
+ );
+ State::Next(StateName::FrontmatterCloseStart)
+ }
+ Some(_) => unreachable!("expected eof/eol"),
+ }
+}
+
+/// After frontmatter.
+///
+/// ```markdown
+/// | ---
+/// | title: "Venus"
+/// > | ---
+/// ^
+/// ```
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.marker = 0;
+
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::Frontmatter);
+ State::Ok
+ }
+ _ => State::Nok,
+ }
+}