From 8774b207b7251730eaa7fbfe4f144122a472dda0 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 22 Aug 2022 16:16:59 +0200 Subject: Add support for GFM task list item --- src/construct/gfm_task_list_item_check.rs | 157 ++++++++++++++++++++++++++++++ src/construct/mod.rs | 1 + src/construct/text.rs | 10 +- 3 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 src/construct/gfm_task_list_item_check.rs (limited to 'src/construct') diff --git a/src/construct/gfm_task_list_item_check.rs b/src/construct/gfm_task_list_item_check.rs new file mode 100644 index 0000000..62ff8aa --- /dev/null +++ b/src/construct/gfm_task_list_item_check.rs @@ -0,0 +1,157 @@ +//! GFM: Task list item check occurs in the [text][] content type. +//! +//! ## Grammar +//! +//! Checks form with the following BNF +//! (see [construct][crate::construct] for character groups): +//! +//! ```bnf +//! gfm_task_list_item_check ::= '[' (0x09 | ' ' | 'X' | 'x') ']' +//! ``` +//! +//! The check is only allowed at the start of the first paragraph, optionally +//! following zero or more definitions or a blank line, in a list item. +//! The check must be followed by whitespace, which is in turn followed by +//! non-whitespace. +//! +//! ## HTML +//! +//! Checks relate to the `` element, in the checkbox state +//! (`type=checkbox`), in HTML. +//! See [*§ 4.10.5.1.15 Checkbox state (`type=checkbox`)*][html-input-checkbox] +//! in the HTML spec for more info. +//! +//! ## Recommendation +//! +//! It is recommended to use lowercase `x` (instead of uppercase `X`), because +//! in markdown, it is more common to use lowercase in places where casing does +//! not matter. +//! It is also recommended to use a space (instead of a tab), as there is no +//! benefit of using tabs in this case. +//! +//! ## Tokens +//! +//! * [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck] +//! * [`GfmTaskListItemMarker`][Name::GfmTaskListItemMarker] +//! * [`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked] +//! * [`GfmTaskListItemValueUnchecked`][Name::GfmTaskListItemValueUnchecked] +//! +//! ## References +//! +//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-task-list-item) +//! * [*§ 5.3 Task list items (extension)* in `GFM`](https://github.github.com/gfm/#task-list-items-extension-) +//! +//! [text]: crate::construct::text +//! [html-input-checkbox]: https://html.spec.whatwg.org/multipage/input.html#checkbox-state-(type=checkbox) + +use crate::construct::partial_space_or_tab::space_or_tab; +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; + +/// At start of task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.parse_state.options.constructs.gfm_task_list_item + && tokenizer + .tokenize_state + .document_at_first_paragraph_of_list_item + && tokenizer.current == Some(b'[') + && tokenizer.previous == None + { + tokenizer.enter(Name::GfmTaskListItemCheck); + tokenizer.enter(Name::GfmTaskListItemMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTaskListItemMarker); + State::Next(StateName::GfmTaskListItemCheckInside) + } else { + State::Nok + } +} + +/// In task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn inside(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.enter(Name::GfmTaskListItemValueUnchecked); + tokenizer.consume(); + tokenizer.exit(Name::GfmTaskListItemValueUnchecked); + State::Next(StateName::GfmTaskListItemCheckClose) + } + Some(b'X' | b'x') => { + tokenizer.enter(Name::GfmTaskListItemValueChecked); + tokenizer.consume(); + tokenizer.exit(Name::GfmTaskListItemValueChecked); + State::Next(StateName::GfmTaskListItemCheckClose) + } + _ => State::Nok, + } +} + +/// At close of task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn close(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b']') => { + tokenizer.enter(Name::GfmTaskListItemMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTaskListItemMarker); + tokenizer.exit(Name::GfmTaskListItemCheck); + State::Next(StateName::GfmTaskListItemCheckAfter) + } + _ => State::Nok, + } +} + +/// After task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + // EOL in paragraph means there must be something else after it. + Some(b'\n') => State::Ok, + // Space or tab? + // Check what comes after. + Some(b'\t' | b' ') => { + tokenizer.check(State::Ok, State::Nok); + tokenizer.attempt( + State::Next(StateName::GfmTaskListItemCheckAfterSpaceOrTab), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } + // EOF, or non-whitespace, both wrong. + _ => State::Nok, + } +} + +/// After whitespace, after task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State { + // End of paragraph, after whitespace, after check, is not okay. + if tokenizer.current == None { + State::Nok + } else { + State::Ok + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ba1a0b3..7ac3899 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -146,6 +146,7 @@ pub mod document; pub mod flow; pub mod frontmatter; pub mod gfm_autolink_literal; +pub mod gfm_task_list_item_check; pub mod hard_break_escape; pub mod heading_atx; pub mod heading_setext; diff --git a/src/construct/text.rs b/src/construct/text.rs index 9d40585..65f55d4 100644 --- a/src/construct/text.rs +++ b/src/construct/text.rs @@ -42,13 +42,21 @@ const MARKERS: [u8; 10] = [ /// Start of text. /// +/// There is a slightly weird case where task list items have their check at +/// the start of the first paragraph. +/// So we start by checking for that. +/// /// ```markdown /// > | abc /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(StateName::TextBefore) + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBefore), + ); + State::Retry(StateName::GfmTaskListItemCheckStart) } /// Before text. -- cgit