aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-16 16:49:29 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-16 16:49:53 +0200
commit6ee90b34c87354baf8e03d5469a92cf5dd17a82b (patch)
treecfa64be772be6464e6f790dabccf8a77e7afe60e /src/construct
parent93d0b7c6465f4ffe220b3ddada729746b11eb6ce (diff)
downloadmarkdown-rs-6ee90b34c87354baf8e03d5469a92cf5dd17a82b.tar.gz
markdown-rs-6ee90b34c87354baf8e03d5469a92cf5dd17a82b.tar.bz2
markdown-rs-6ee90b34c87354baf8e03d5469a92cf5dd17a82b.zip
Add support for frontmatter
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/code_indented.rs2
-rw-r--r--src/construct/document.rs20
-rw-r--r--src/construct/flow.rs24
-rw-r--r--src/construct/frontmatter.rs293
-rw-r--r--src/construct/mod.rs2
-rw-r--r--src/construct/thematic_break.rs2
6 files changed, 327 insertions, 16 deletions
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 89c5652..c5439f1 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -53,8 +53,8 @@
//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
-use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::TAB_SIZE;
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
diff --git a/src/construct/document.rs b/src/construct/document.rs
index 0cda368..2cc170d 100644
--- a/src/construct/document.rs
+++ b/src/construct/document.rs
@@ -58,13 +58,29 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
)));
tokenizer.attempt(
- State::Next(StateName::DocumentContainerExistingBefore),
- State::Next(StateName::DocumentContainerExistingBefore),
+ State::Next(StateName::DocumentBeforeFrontmatter),
+ State::Next(StateName::DocumentBeforeFrontmatter),
);
State::Retry(StateName::BomStart)
}
+/// At optional frontmatter.
+///
+/// ```markdown
+/// > | ---
+/// ^
+/// | title: Venus
+/// | ---
+/// ```
+pub fn before_frontmatter(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::DocumentContainerNewBefore),
+ State::Next(StateName::DocumentContainerNewBefore),
+ );
+ State::Retry(StateName::FrontmatterStart)
+}
+
/// At optional existing containers.
//
/// ```markdown
diff --git a/src/construct/flow.rs b/src/construct/flow.rs
index 08c7891..f3c7685 100644
--- a/src/construct/flow.rs
+++ b/src/construct/flow.rs
@@ -35,28 +35,28 @@ use crate::tokenizer::Tokenizer;
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'`' | b'~') => {
+ Some(b'#') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
State::Next(StateName::FlowBeforeParagraph),
);
- State::Retry(StateName::CodeFencedStart)
+ State::Retry(StateName::HeadingAtxStart)
}
- Some(b'<') => {
+ Some(b'*' | b'_') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
State::Next(StateName::FlowBeforeParagraph),
);
- State::Retry(StateName::HtmlFlowStart)
+ State::Retry(StateName::ThematicBreakStart)
}
- Some(b'#') => {
+ Some(b'<') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
State::Next(StateName::FlowBeforeParagraph),
);
- State::Retry(StateName::HeadingAtxStart)
+ State::Retry(StateName::HtmlFlowStart)
}
- // Note: `-` is also used in thematic breaks, so it’s not included here.
+ // Note: `-` is also used in thematic breaks so it’s not included here.
Some(b'=') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
@@ -64,22 +64,22 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::HeadingSetextStart)
}
- Some(b'*' | b'_') => {
+ Some(b'[') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
State::Next(StateName::FlowBeforeParagraph),
);
- State::Retry(StateName::ThematicBreakStart)
+ State::Retry(StateName::DefinitionStart)
}
- Some(b'[') => {
+ Some(b'`' | b'~') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
State::Next(StateName::FlowBeforeParagraph),
);
- State::Retry(StateName::DefinitionStart)
+ State::Retry(StateName::CodeFencedStart)
}
// Actual parsing: blank line? Indented code? Indented anything?
- // Also includes `-` which can be a setext heading underline or a thematic break.
+ // Also includes `-` which can be a setext heading underline or thematic break.
None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
// Must be a paragraph.
Some(_) => {
diff --git a/src/construct/frontmatter.rs b/src/construct/frontmatter.rs
new file mode 100644
index 0000000..dc47bee
--- /dev/null
+++ b/src/construct/frontmatter.rs
@@ -0,0 +1,293 @@
+//! Frontmatter occurs at the start of the document.
+//!
+//! ## Grammar
+//!
+//! Frontmatter forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! frontmatter ::= fence_open *( eol *byte ) eol fence_close
+//! fence_open ::= sequence *space_or_tab
+//! ; Restriction: markers in `sequence` must match markers in opening sequence.
+//! fence_close ::= sequence *space_or_tab
+//! sequence ::= 3'+' | 3'-'
+//! ```
+//!
+//! Frontmatter can only occur once.
+//! It cannot occur in a container.
+//! It must have a closing fence.
+//! Like flow constructs, it must be followed by an eol (line ending) or
+//! eof (end of file).
+//!
+//! ## Extension
+//!
+//! > 👉 **Note**: frontmatter is not part of `CommonMark`, so frontmatter is
+//! > not enabled by default.
+//! > You need to enable it manually.
+//! > See [`Constructs`][constructs] for more info.
+//!
+//! As there is no spec for frontmatter in markdown, this extension follows how
+//! YAML frontmatter works on `github.com`.
+//! It also parses TOML frontmatter, just like YAML except that it uses a `+`.
+//!
+//! ## Recommendation
+//!
+//! When authoring markdown with frontmatter, it’s recommended to use YAML
+//! frontmatter if possible.
+//! While YAML has some warts, it works in the most places, so using it
+//! guarantees the highest chance of portability.
+//!
+//! In certain ecosystems, other flavors are widely used.
+//! For example, in the Rust ecosystem, TOML is often used.
+//! In such cases, using TOML is an okay choice.
+//!
+//! ## Tokens
+//!
+//! * [`Frontmatter`][Name::Frontmatter]
+//! * [`FrontmatterFence`][Name::FrontmatterFence]
+//! * [`FrontmatterSequence`][Name::FrontmatterSequence]
+//! * [`FrontmatterChunk`][Name::FrontmatterChunk]
+//! * [`LineEnding`][Name::LineEnding]
+//! * [`SpaceOrTab`][Name::SpaceOrTab]
+//!
+//! ## References
+//!
+//! * [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter)
+//!
+//! [constructs]: crate::Constructs
+
+use crate::constant::FRONTMATTER_SEQUENCE_SIZE;
+use crate::construct::partial_space_or_tab::space_or_tab;
+use crate::event::Name;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Start of frontmatter.
+///
+/// ```markdown
+/// > | ---
+/// ^
+/// | title: "Venus"
+/// | ---
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ // Indent not allowed.
+ if tokenizer.parse_state.constructs.frontmatter
+ && matches!(tokenizer.current, Some(b'+' | b'-'))
+ {
+ tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
+ tokenizer.enter(Name::Frontmatter);
+ tokenizer.enter(Name::FrontmatterFence);
+ tokenizer.enter(Name::FrontmatterSequence);
+ State::Retry(StateName::FrontmatterOpenSequence)
+ } else {
+ State::Nok
+ }
+}
+
+/// In open sequence.
+///
+/// ```markdown
+/// > | ---
+/// ^
+/// | title: "Venus"
+/// | ---
+/// ```
+pub fn open_sequence(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::FrontmatterOpenSequence)
+ } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE {
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.exit(Name::FrontmatterSequence);
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::FrontmatterOpenAfter), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::FrontmatterOpenAfter)
+ }
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ }
+}
+
+/// After open sequence.
+///
+/// ```markdown
+/// > | ---
+/// ^
+/// | title: "Venus"
+/// | ---
+/// ```
+pub fn open_after(tokenizer: &mut Tokenizer) -> State {
+ if let Some(b'\n') = tokenizer.current {
+ tokenizer.exit(Name::FrontmatterFence);
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ tokenizer.attempt(
+ State::Next(StateName::FrontmatterAfter),
+ State::Next(StateName::FrontmatterContentStart),
+ );
+ State::Next(StateName::FrontmatterCloseStart)
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
+ }
+}
+
+/// Start of close sequence.
+///
+/// ```markdown
+/// | ---
+/// | title: "Venus"
+/// > | ---
+/// ^
+/// ```
+pub fn close_start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.enter(Name::FrontmatterFence);
+ tokenizer.enter(Name::FrontmatterSequence);
+ State::Retry(StateName::FrontmatterCloseSequence)
+ } else {
+ State::Nok
+ }
+}
+
+/// In close sequence.
+///
+/// ```markdown
+/// | ---
+/// | title: "Venus"
+/// > | ---
+/// ^
+/// ```
+pub fn close_sequence(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::FrontmatterCloseSequence)
+ } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE {
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.exit(Name::FrontmatterSequence);
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::FrontmatterCloseAfter), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::FrontmatterCloseAfter)
+ }
+ } else {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ }
+}
+
+/// After close sequence.
+///
+/// ```markdown
+/// | ---
+/// | title: "Venus"
+/// > | ---
+/// ^
+/// ```
+pub fn close_after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::FrontmatterFence);
+ State::Ok
+ }
+ _ => State::Nok,
+ }
+}
+
+/// Start of content chunk.
+///
+/// ```markdown
+/// | ---
+/// > | title: "Venus"
+/// ^
+/// | ---
+/// ```
+pub fn content_start(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => State::Retry(StateName::FrontmatterContentEnd),
+ Some(_) => {
+ tokenizer.enter(Name::FrontmatterChunk);
+ State::Retry(StateName::FrontmatterContentInside)
+ }
+ }
+}
+
+/// In content chunk.
+///
+/// ```markdown
+/// | ---
+/// > | title: "Venus"
+/// ^
+/// | ---
+/// ```
+pub fn content_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::FrontmatterChunk);
+ State::Retry(StateName::FrontmatterContentEnd)
+ }
+ Some(_) => {
+ tokenizer.consume();
+ State::Next(StateName::FrontmatterContentInside)
+ }
+ }
+}
+
+/// End of content chunk.
+///
+/// ```markdown
+/// | ---
+/// > | title: "Venus"
+/// ^
+/// | ---
+/// ```
+pub fn content_end(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
+ }
+ Some(b'\n') => {
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ tokenizer.attempt(
+ State::Next(StateName::FrontmatterAfter),
+ State::Next(StateName::FrontmatterContentStart),
+ );
+ State::Next(StateName::FrontmatterCloseStart)
+ }
+ Some(_) => unreachable!("expected eof/eol"),
+ }
+}
+
+/// After frontmatter.
+///
+/// ```markdown
+/// | ---
+/// | title: "Venus"
+/// > | ---
+/// ^
+/// ```
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.marker = 0;
+
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::Frontmatter);
+ State::Ok
+ }
+ _ => State::Nok,
+ }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 5630143..1c1c6f7 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -40,6 +40,7 @@
//! * [code (indented)][code_indented]
//! * [code (text)][code_text]
//! * [definition][]
+//! * [frontmatter][]
//! * [hard break (escape)][hard_break_escape]
//! * [heading (atx)][heading_atx]
//! * [heading (setext)][heading_setext]
@@ -139,6 +140,7 @@ pub mod code_text;
pub mod definition;
pub mod document;
pub mod flow;
+pub mod frontmatter;
pub mod hard_break_escape;
pub mod heading_atx;
pub mod heading_setext;
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 0a8ebe9..74fd961 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -56,8 +56,8 @@
//! [list-item]: crate::construct::list_item
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-hr-element
-use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;