From 8774b207b7251730eaa7fbfe4f144122a472dda0 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 22 Aug 2022 16:16:59 +0200 Subject: Add support for GFM task list item --- src/compiler.rs | 26 ++++- src/construct/gfm_task_list_item_check.rs | 157 ++++++++++++++++++++++++++++++ src/construct/mod.rs | 1 + src/construct/text.rs | 10 +- src/event.rs | 79 ++++++++++++++- src/lib.rs | 9 ++ src/state.rs | 14 +++ src/subtokenize.rs | 32 +++++- src/tokenizer.rs | 10 +- src/util/skip.rs | 12 ++- 10 files changed, 338 insertions(+), 12 deletions(-) create mode 100644 src/construct/gfm_task_list_item_check.rs (limited to 'src') diff --git a/src/compiler.rs b/src/compiler.rs index abf35c8..f1003fd 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -327,6 +327,7 @@ fn enter(context: &mut CompileContext) { Name::Emphasis => on_enter_emphasis(context), Name::Frontmatter => on_enter_frontmatter(context), Name::GfmStrikethrough => on_enter_gfm_strikethrough(context), + Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context), Name::HtmlFlow => on_enter_html_flow(context), Name::HtmlText => on_enter_html_text(context), Name::Image => on_enter_image(context), @@ -370,10 +371,12 @@ fn exit(context: &mut CompileContext) { Name::DefinitionTitleString => on_exit_definition_title_string(context), Name::Emphasis => on_exit_emphasis(context), Name::Frontmatter => on_exit_frontmatter(context), - Name::GfmStrikethrough => on_exit_gfm_strikethrough(context), Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context), Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context), Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context), + Name::GfmStrikethrough => on_exit_gfm_strikethrough(context), + Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context), + Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context), Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context), Name::HeadingAtx => on_exit_heading_atx(context), Name::HeadingAtxSequence => on_exit_heading_atx_sequence(context), @@ -476,6 +479,13 @@ fn on_enter_gfm_strikethrough(context: &mut CompileContext) { } } +/// Handle [`Enter`][Kind::Enter]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]. +fn on_enter_gfm_task_list_item_check(context: &mut CompileContext) { + if !context.image_alt_inside { + context.push(""); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked]. +fn on_exit_gfm_task_list_item_value_checked(context: &mut CompileContext) { + if !context.image_alt_inside { + context.push("checked=\"\" "); + } +} + /// Handle [`Exit`][Kind::Exit]:[`HeadingAtx`][Name::HeadingAtx]. fn on_exit_heading_atx(context: &mut CompileContext) { let rank = context diff --git a/src/construct/gfm_task_list_item_check.rs b/src/construct/gfm_task_list_item_check.rs new file mode 100644 index 0000000..62ff8aa --- /dev/null +++ b/src/construct/gfm_task_list_item_check.rs @@ -0,0 +1,157 @@ +//! GFM: Task list item check occurs in the [text][] content type. +//! +//! ## Grammar +//! +//! Checks form with the following BNF +//! (see [construct][crate::construct] for character groups): +//! +//! ```bnf +//! gfm_task_list_item_check ::= '[' (0x09 | ' ' | 'X' | 'x') ']' +//! ``` +//! +//! The check is only allowed at the start of the first paragraph, optionally +//! following zero or more definitions or a blank line, in a list item. +//! The check must be followed by whitespace, which is in turn followed by +//! non-whitespace. +//! +//! ## HTML +//! +//! Checks relate to the `` element, in the checkbox state +//! (`type=checkbox`), in HTML. +//! See [*§ 4.10.5.1.15 Checkbox state (`type=checkbox`)*][html-input-checkbox] +//! in the HTML spec for more info. +//! +//! ## Recommendation +//! +//! It is recommended to use lowercase `x` (instead of uppercase `X`), because +//! in markdown, it is more common to use lowercase in places where casing does +//! not matter. +//! It is also recommended to use a space (instead of a tab), as there is no +//! benefit of using tabs in this case. +//! +//! ## Tokens +//! +//! * [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck] +//! * [`GfmTaskListItemMarker`][Name::GfmTaskListItemMarker] +//! * [`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked] +//! * [`GfmTaskListItemValueUnchecked`][Name::GfmTaskListItemValueUnchecked] +//! +//! ## References +//! +//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-task-list-item) +//! * [*§ 5.3 Task list items (extension)* in `GFM`](https://github.github.com/gfm/#task-list-items-extension-) +//! +//! [text]: crate::construct::text +//! [html-input-checkbox]: https://html.spec.whatwg.org/multipage/input.html#checkbox-state-(type=checkbox) + +use crate::construct::partial_space_or_tab::space_or_tab; +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; + +/// At start of task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.parse_state.options.constructs.gfm_task_list_item + && tokenizer + .tokenize_state + .document_at_first_paragraph_of_list_item + && tokenizer.current == Some(b'[') + && tokenizer.previous == None + { + tokenizer.enter(Name::GfmTaskListItemCheck); + tokenizer.enter(Name::GfmTaskListItemMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTaskListItemMarker); + State::Next(StateName::GfmTaskListItemCheckInside) + } else { + State::Nok + } +} + +/// In task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn inside(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.enter(Name::GfmTaskListItemValueUnchecked); + tokenizer.consume(); + tokenizer.exit(Name::GfmTaskListItemValueUnchecked); + State::Next(StateName::GfmTaskListItemCheckClose) + } + Some(b'X' | b'x') => { + tokenizer.enter(Name::GfmTaskListItemValueChecked); + tokenizer.consume(); + tokenizer.exit(Name::GfmTaskListItemValueChecked); + State::Next(StateName::GfmTaskListItemCheckClose) + } + _ => State::Nok, + } +} + +/// At close of task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn close(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b']') => { + tokenizer.enter(Name::GfmTaskListItemMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTaskListItemMarker); + tokenizer.exit(Name::GfmTaskListItemCheck); + State::Next(StateName::GfmTaskListItemCheckAfter) + } + _ => State::Nok, + } +} + +/// After task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + // EOL in paragraph means there must be something else after it. + Some(b'\n') => State::Ok, + // Space or tab? + // Check what comes after. + Some(b'\t' | b' ') => { + tokenizer.check(State::Ok, State::Nok); + tokenizer.attempt( + State::Next(StateName::GfmTaskListItemCheckAfterSpaceOrTab), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } + // EOF, or non-whitespace, both wrong. + _ => State::Nok, + } +} + +/// After whitespace, after task list item check. +/// +/// ```markdown +/// > | * [x] y. +/// ^ +/// ``` +pub fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State { + // End of paragraph, after whitespace, after check, is not okay. + if tokenizer.current == None { + State::Nok + } else { + State::Ok + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ba1a0b3..7ac3899 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -146,6 +146,7 @@ pub mod document; pub mod flow; pub mod frontmatter; pub mod gfm_autolink_literal; +pub mod gfm_task_list_item_check; pub mod hard_break_escape; pub mod heading_atx; pub mod heading_setext; diff --git a/src/construct/text.rs b/src/construct/text.rs index 9d40585..65f55d4 100644 --- a/src/construct/text.rs +++ b/src/construct/text.rs @@ -42,13 +42,21 @@ const MARKERS: [u8; 10] = [ /// Start of text. /// +/// There is a slightly weird case where task list items have their check at +/// the start of the first paragraph. +/// So we start by checking for that. +/// /// ```markdown /// > | abc /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.markers = &MARKERS; - State::Retry(StateName::TextBefore) + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBefore), + ); + State::Retry(StateName::GfmTaskListItemCheckStart) } /// Before text. diff --git a/src/event.rs b/src/event.rs index 3c690e1..f20c599 100644 --- a/src/event.rs +++ b/src/event.rs @@ -1074,6 +1074,80 @@ pub enum Name { /// ^ /// ``` GfmStrikethroughText, + /// GFM: Task list item check. + /// + /// ## Info + /// + /// * **Context**: + /// [text content][crate::construct::text] + /// * **Content model**: + /// [`GfmTaskListItemMarker`][Name::GfmTaskListItemMarker], + /// [`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked], + /// [`GfmTaskListItemValueUnchecked`][Name::GfmTaskListItemValueUnchecked] + /// * **Construct**: + /// [`gfm_task_list_item_check`][crate::construct::gfm_task_list_item_check] + /// + /// ## Example + /// + /// ```markdown + /// > | * [x] y. + /// ^^^ + /// ``` + GfmTaskListItemCheck, + /// GFM: Task list item check marker. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_task_list_item_check`][crate::construct::gfm_task_list_item_check] + /// + /// ## Example + /// + /// ```markdown + /// > | * [x] y. + /// ^ ^ + /// ``` + GfmTaskListItemMarker, + /// GFM: Task list item value: checked. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_task_list_item_check`][crate::construct::gfm_task_list_item_check] + /// + /// ## Example + /// + /// ```markdown + /// > | * [x] y. + /// ^ + /// ``` + GfmTaskListItemValueChecked, + /// GFM: Task list item value: unchecked. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_task_list_item_check`][crate::construct::gfm_task_list_item_check] + /// + /// ## Example + /// + /// ```markdown + /// > | * [ ] z. + /// ^ + /// ``` + GfmTaskListItemValueUnchecked, /// Whole hard break (escape). /// /// ## Info @@ -2031,7 +2105,7 @@ pub enum Name { } /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 47] = [ +pub const VOID_EVENTS: [Name; 50] = [ Name::AttentionSequence, Name::AutolinkEmail, Name::AutolinkMarker, @@ -2061,6 +2135,9 @@ pub const VOID_EVENTS: [Name; 47] = [ Name::GfmAutolinkLiteralProtocol, Name::GfmAutolinkLiteralWww, Name::GfmStrikethroughSequence, + Name::GfmTaskListItemMarker, + Name::GfmTaskListItemValueChecked, + Name::GfmTaskListItemValueUnchecked, Name::FrontmatterSequence, Name::HardBreakEscape, Name::HardBreakTrailing, diff --git a/src/lib.rs b/src/lib.rs index 893255a..5b7836c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -178,6 +178,13 @@ pub struct Constructs { /// ^^^ /// ``` pub gfm_strikethrough: bool, + /// GFM: task list item. + /// + /// ```markdown + /// > | * [x] y. + /// ^^^ + /// ``` + pub gfm_task_list_item: bool, /// Hard break (escape). /// /// ```markdown @@ -277,6 +284,7 @@ impl Default for Constructs { frontmatter: false, gfm_autolink_literal: false, gfm_strikethrough: false, + gfm_task_list_item: false, hard_break_escape: true, hard_break_trailing: true, heading_atx: true, @@ -301,6 +309,7 @@ impl Constructs { Self { gfm_autolink_literal: true, gfm_strikethrough: true, + gfm_task_list_item: true, ..Self::default() } } diff --git a/src/state.rs b/src/state.rs index da935d1..65ffbeb 100644 --- a/src/state.rs +++ b/src/state.rs @@ -145,6 +145,12 @@ pub enum Name { FrontmatterCloseSequence, FrontmatterCloseAfter, + GfmTaskListItemCheckStart, + GfmTaskListItemCheckInside, + GfmTaskListItemCheckClose, + GfmTaskListItemCheckAfter, + GfmTaskListItemCheckAfterSpaceOrTab, + HardBreakEscapeStart, HardBreakEscapeAfter, @@ -444,6 +450,14 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::FrontmatterCloseSequence => construct::frontmatter::close_sequence, Name::FrontmatterCloseAfter => construct::frontmatter::close_after, + Name::GfmTaskListItemCheckStart => construct::gfm_task_list_item_check::start, + Name::GfmTaskListItemCheckInside => construct::gfm_task_list_item_check::inside, + Name::GfmTaskListItemCheckClose => construct::gfm_task_list_item_check::close, + Name::GfmTaskListItemCheckAfter => construct::gfm_task_list_item_check::after, + Name::GfmTaskListItemCheckAfterSpaceOrTab => { + construct::gfm_task_list_item_check::after_space_or_tab + } + Name::HardBreakEscapeStart => construct::hard_break_escape::start, Name::HardBreakEscapeAfter => construct::hard_break_escape::after, diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 5932f11..7fcc481 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -17,11 +17,11 @@ //! whole document needs to be parsed up to the level of definitions, before //! any level that can include references can be parsed. -use crate::event::{Content, Event, Kind, VOID_EVENTS}; +use crate::event::{Content, Event, Kind, Name, VOID_EVENTS}; use crate::parser::ParseState; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -use crate::util::edit_map::EditMap; +use crate::util::{edit_map::EditMap, skip}; use alloc::{vec, vec::Vec}; /// Link two [`Event`][]s. @@ -94,6 +94,34 @@ pub fn subtokenize(events: &mut Vec, parse_state: &ParseState) -> bool { StateName::TextStart }); + // Check if this is the first paragraph, after zero or more + // definitions (or a blank line), in a list item. + // Used for GFM task list items. + if tokenizer.parse_state.options.constructs.gfm_task_list_item + && index > 2 + && events[index - 1].kind == Kind::Enter + && events[index - 1].name == Name::Paragraph + { + let before = skip::opt_back( + events, + index - 2, + &[ + Name::BlankLineEnding, + Name::Definition, + Name::LineEnding, + Name::SpaceOrTab, + ], + ); + + if events[before].kind == Kind::Exit + && events[before].name == Name::ListItemPrefix + { + tokenizer + .tokenize_state + .document_at_first_paragraph_of_list_item = true; + } + } + // Loop through links to pass them in order to the subtokenizer. while let Some(index) = link_index { let enter = &events[index]; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 0bd1f31..731b829 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -144,6 +144,10 @@ pub struct TokenizeState<'a> { pub document_exits: Vec>>, /// Whether the previous flow was a paragraph. pub document_paragraph_before: bool, + /// Whether this is the first paragraph (potentially after definitions) in + /// a list item. + /// Used for GFM task list items. + pub document_at_first_paragraph_of_list_item: bool, // Couple of very frequent settings for parsing whitespace. pub space_or_tab_eol_content: Option, @@ -282,6 +286,7 @@ impl<'a> Tokenizer<'a> { document_data_index: None, document_child_state: None, document_child: None, + document_at_first_paragraph_of_list_item: false, definitions: vec![], end: 0, label_starts: vec![], @@ -509,11 +514,6 @@ impl<'a> Tokenizer<'a> { /// Stack an attempt, moving to `ok` on [`State::Ok`][] and `nok` on /// [`State::Nok`][], reverting in both cases. pub fn check(&mut self, ok: State, nok: State) { - debug_assert_ne!( - nok, - State::Nok, - "checking w/ `State::Nok` should likely be an attempt" - ); // Always capture (and restore) when checking. // No need to capture (and restore) when `nok` is `State::Nok`, because the // parent attempt will do it. diff --git a/src/util/skip.rs b/src/util/skip.rs index a7de408..df63498 100644 --- a/src/util/skip.rs +++ b/src/util/skip.rs @@ -59,12 +59,20 @@ fn skip_opt_impl(events: &[Event], mut index: usize, names: &[Name], forward: bo balance - 1 }; + let next = if forward { + index + 1 + } else if index > 0 { + index - 1 + } else { + index + }; + if events[index].name == *current && balance == 0 { - index = if forward { index + 1 } else { index - 1 }; + index = next; break; } - index = if forward { index + 1 } else { index - 1 }; + index = next; } } -- cgit