diff options
Diffstat (limited to '')
| -rw-r--r-- | src/construct/gfm_task_list_item_check.rs | 157 | ||||
| -rw-r--r-- | src/construct/mod.rs | 1 | ||||
| -rw-r--r-- | src/construct/text.rs | 10 | 
3 files changed, 167 insertions, 1 deletions
diff --git a/src/construct/gfm_task_list_item_check.rs b/src/construct/gfm_task_list_item_check.rs new file mode 100644 index 0000000..62ff8aa --- /dev/null +++ b/src/construct/gfm_task_list_item_check.rs @@ -0,0 +1,157 @@ +//! GFM: Task list item check occurs in the [text][] content type. +//! +//! ## Grammar +//! +//! Checks form with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! gfm_task_list_item_check ::= '[' (0x09 | ' ' | 'X' | 'x') ']' +//! ``` +//! +//! The check is only allowed at the start of the first paragraph, optionally +//! following zero or more definitions or a blank line, in a list item. +//! The check must be followed by whitespace, which is in turn followed by +//! non-whitespace. +//! +//! ## HTML +//! +//! Checks relate to the `<input>` element, in the checkbox state +//! (`type=checkbox`), in HTML. +//! See [*§ 4.10.5.1.15 Checkbox state (`type=checkbox`)*][html-input-checkbox] +//! in the HTML spec for more info. +//! +//! ## Recommendation +//! +//! It is recommended to use lowercase `x` (instead of uppercase `X`), because +//! in markdown, it is more common to use lowercase in places where casing does +//! not matter. +//! It is also recommended to use a space (instead of a tab), as there is no +//! benefit of using tabs in this case. +//! +//! ## Tokens +//! +//! *   [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck] +//! *   [`GfmTaskListItemMarker`][Name::GfmTaskListItemMarker] +//! *   [`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked] +//! *   [`GfmTaskListItemValueUnchecked`][Name::GfmTaskListItemValueUnchecked] +//! +//! ## References +//! +//! *   [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-task-list-item) +//! *   [*§ 5.3 Task list items (extension)* in `GFM`](https://github.github.com/gfm/#task-list-items-extension-) +//! +//! [text]: crate::construct::text +//! [html-input-checkbox]: https://html.spec.whatwg.org/multipage/input.html#checkbox-state-(type=checkbox) + +use crate::construct::partial_space_or_tab::space_or_tab; +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; + +/// At start of task list item check. +/// +/// ```markdown +/// > | * [x] y. +///       ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { +    if tokenizer.parse_state.options.constructs.gfm_task_list_item +        && tokenizer +            .tokenize_state +            .document_at_first_paragraph_of_list_item +        && tokenizer.current == Some(b'[') +        && tokenizer.previous == None +    { +        tokenizer.enter(Name::GfmTaskListItemCheck); +        tokenizer.enter(Name::GfmTaskListItemMarker); +        tokenizer.consume(); +        tokenizer.exit(Name::GfmTaskListItemMarker); +        State::Next(StateName::GfmTaskListItemCheckInside) +    } else { +        State::Nok +    } +} + +/// In task list item check. +/// +/// ```markdown +/// > | * [x] y. +///        ^ +/// ``` +pub fn inside(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        Some(b'\t' | b' ') => { +            tokenizer.enter(Name::GfmTaskListItemValueUnchecked); +            tokenizer.consume(); +            tokenizer.exit(Name::GfmTaskListItemValueUnchecked); +            State::Next(StateName::GfmTaskListItemCheckClose) +        } +        Some(b'X' | b'x') => { +            tokenizer.enter(Name::GfmTaskListItemValueChecked); +            tokenizer.consume(); +            tokenizer.exit(Name::GfmTaskListItemValueChecked); +            State::Next(StateName::GfmTaskListItemCheckClose) +        } +        _ => State::Nok, +    } +} + +/// At close of task list item check. +/// +/// ```markdown +/// > | * [x] y. +///         ^ +/// ``` +pub fn close(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        Some(b']') => { +            tokenizer.enter(Name::GfmTaskListItemMarker); +            tokenizer.consume(); +            tokenizer.exit(Name::GfmTaskListItemMarker); +            tokenizer.exit(Name::GfmTaskListItemCheck); +            State::Next(StateName::GfmTaskListItemCheckAfter) +        } +        _ => State::Nok, +    } +} + +/// After task list item check. +/// +/// ```markdown +/// > | * [x] y. +///          ^ +/// ``` +pub fn after(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        // EOL in paragraph means there must be something else after it. +        Some(b'\n') => State::Ok, +        // Space or tab? +        // Check what comes after. +        Some(b'\t' | b' ') => { +            tokenizer.check(State::Ok, State::Nok); +            tokenizer.attempt( +                State::Next(StateName::GfmTaskListItemCheckAfterSpaceOrTab), +                State::Nok, +            ); +            State::Retry(space_or_tab(tokenizer)) +        } +        // EOF, or non-whitespace, both wrong. +        _ => State::Nok, +    } +} + +/// After whitespace, after task list item check. +/// +/// ```markdown +/// > | * [x] y. +///           ^ +/// ``` +pub fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State { +    // End of paragraph, after whitespace, after check, is not okay. +    if tokenizer.current == None { +        State::Nok +    } else { +        State::Ok +    } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ba1a0b3..7ac3899 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -146,6 +146,7 @@ pub mod document;  pub mod flow;  pub mod frontmatter;  pub mod gfm_autolink_literal; +pub mod gfm_task_list_item_check;  pub mod hard_break_escape;  pub mod heading_atx;  pub mod heading_setext; diff --git a/src/construct/text.rs b/src/construct/text.rs index 9d40585..65f55d4 100644 --- a/src/construct/text.rs +++ b/src/construct/text.rs @@ -42,13 +42,21 @@ const MARKERS: [u8; 10] = [  /// Start of text.  /// +/// There is a slightly weird case where task list items have their check at +/// the start of the first paragraph. +/// So we start by checking for that. +///  /// ```markdown  /// > | abc  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.markers = &MARKERS; -    State::Retry(StateName::TextBefore) +    tokenizer.attempt( +        State::Next(StateName::TextBefore), +        State::Next(StateName::TextBefore), +    ); +    State::Retry(StateName::GfmTaskListItemCheckStart)  }  /// Before text.  | 
