aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-22 16:16:59 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-22 16:16:59 +0200
commit8774b207b7251730eaa7fbfe4f144122a472dda0 (patch)
treece1b8f92a08ff70da265ae8e4484dba2335280a9 /src
parent351c69644bdbdf52c95e322904273657892920b5 (diff)
downloadmarkdown-rs-8774b207b7251730eaa7fbfe4f144122a472dda0.tar.gz
markdown-rs-8774b207b7251730eaa7fbfe4f144122a472dda0.tar.bz2
markdown-rs-8774b207b7251730eaa7fbfe4f144122a472dda0.zip
Add support for GFM task list item
Diffstat (limited to '')
-rw-r--r--src/compiler.rs26
-rw-r--r--src/construct/gfm_task_list_item_check.rs157
-rw-r--r--src/construct/mod.rs1
-rw-r--r--src/construct/text.rs10
-rw-r--r--src/event.rs79
-rw-r--r--src/lib.rs9
-rw-r--r--src/state.rs14
-rw-r--r--src/subtokenize.rs32
-rw-r--r--src/tokenizer.rs10
-rw-r--r--src/util/skip.rs12
10 files changed, 338 insertions, 12 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index abf35c8..f1003fd 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -327,6 +327,7 @@ fn enter(context: &mut CompileContext) {
Name::Emphasis => on_enter_emphasis(context),
Name::Frontmatter => on_enter_frontmatter(context),
Name::GfmStrikethrough => on_enter_gfm_strikethrough(context),
+ Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context),
Name::HtmlFlow => on_enter_html_flow(context),
Name::HtmlText => on_enter_html_text(context),
Name::Image => on_enter_image(context),
@@ -370,10 +371,12 @@ fn exit(context: &mut CompileContext) {
Name::DefinitionTitleString => on_exit_definition_title_string(context),
Name::Emphasis => on_exit_emphasis(context),
Name::Frontmatter => on_exit_frontmatter(context),
- Name::GfmStrikethrough => on_exit_gfm_strikethrough(context),
Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context),
Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context),
Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context),
+ Name::GfmStrikethrough => on_exit_gfm_strikethrough(context),
+ Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context),
+ Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context),
Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context),
Name::HeadingAtx => on_exit_heading_atx(context),
Name::HeadingAtxSequence => on_exit_heading_atx_sequence(context),
@@ -476,6 +479,13 @@ fn on_enter_gfm_strikethrough(context: &mut CompileContext) {
}
}
+/// Handle [`Enter`][Kind::Enter]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
+fn on_enter_gfm_task_list_item_check(context: &mut CompileContext) {
+ if !context.image_alt_inside {
+ context.push("<input type=\"checkbox\" disabled=\"\" ");
+ }
+}
+
/// Handle [`Enter`][Kind::Enter]:[`HtmlFlow`][Name::HtmlFlow].
fn on_enter_html_flow(context: &mut CompileContext) {
context.line_ending_if_needed();
@@ -958,6 +968,20 @@ fn on_exit_gfm_strikethrough(context: &mut CompileContext) {
}
}
+/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
+fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) {
+ if !context.image_alt_inside {
+ context.push("/>");
+ }
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked].
+fn on_exit_gfm_task_list_item_value_checked(context: &mut CompileContext) {
+ if !context.image_alt_inside {
+ context.push("checked=\"\" ");
+ }
+}
+
/// Handle [`Exit`][Kind::Exit]:[`HeadingAtx`][Name::HeadingAtx].
fn on_exit_heading_atx(context: &mut CompileContext) {
let rank = context
diff --git a/src/construct/gfm_task_list_item_check.rs b/src/construct/gfm_task_list_item_check.rs
new file mode 100644
index 0000000..62ff8aa
--- /dev/null
+++ b/src/construct/gfm_task_list_item_check.rs
@@ -0,0 +1,157 @@
+//! GFM: Task list item check occurs in the [text][] content type.
+//!
+//! ## Grammar
+//!
+//! Checks form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! gfm_task_list_item_check ::= '[' (0x09 | ' ' | 'X' | 'x') ']'
+//! ```
+//!
+//! The check is only allowed at the start of the first paragraph, optionally
+//! following zero or more definitions or a blank line, in a list item.
+//! The check must be followed by whitespace, which is in turn followed by
+//! non-whitespace.
+//!
+//! ## HTML
+//!
+//! Checks relate to the `<input>` element, in the checkbox state
+//! (`type=checkbox`), in HTML.
+//! See [*§ 4.10.5.1.15 Checkbox state (`type=checkbox`)*][html-input-checkbox]
+//! in the HTML spec for more info.
+//!
+//! ## Recommendation
+//!
+//! It is recommended to use lowercase `x` (instead of uppercase `X`), because
+//! in markdown, it is more common to use lowercase in places where casing does
+//! not matter.
+//! It is also recommended to use a space (instead of a tab), as there is no
+//! benefit of using tabs in this case.
+//!
+//! ## Tokens
+//!
+//! * [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]
+//! * [`GfmTaskListItemMarker`][Name::GfmTaskListItemMarker]
+//! * [`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked]
+//! * [`GfmTaskListItemValueUnchecked`][Name::GfmTaskListItemValueUnchecked]
+//!
+//! ## References
+//!
+//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-task-list-item)
+//! * [*§ 5.3 Task list items (extension)* in `GFM`](https://github.github.com/gfm/#task-list-items-extension-)
+//!
+//! [text]: crate::construct::text
+//! [html-input-checkbox]: https://html.spec.whatwg.org/multipage/input.html#checkbox-state-(type=checkbox)
+
+use crate::construct::partial_space_or_tab::space_or_tab;
+use crate::event::Name;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// At start of task list item check.
+///
+/// ```markdown
+/// > | * [x] y.
+/// ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.parse_state.options.constructs.gfm_task_list_item
+ && tokenizer
+ .tokenize_state
+ .document_at_first_paragraph_of_list_item
+ && tokenizer.current == Some(b'[')
+ && tokenizer.previous == None
+ {
+ tokenizer.enter(Name::GfmTaskListItemCheck);
+ tokenizer.enter(Name::GfmTaskListItemMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTaskListItemMarker);
+ State::Next(StateName::GfmTaskListItemCheckInside)
+ } else {
+ State::Nok
+ }
+}
+
+/// In task list item check.
+///
+/// ```markdown
+/// > | * [x] y.
+/// ^
+/// ```
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\t' | b' ') => {
+ tokenizer.enter(Name::GfmTaskListItemValueUnchecked);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTaskListItemValueUnchecked);
+ State::Next(StateName::GfmTaskListItemCheckClose)
+ }
+ Some(b'X' | b'x') => {
+ tokenizer.enter(Name::GfmTaskListItemValueChecked);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTaskListItemValueChecked);
+ State::Next(StateName::GfmTaskListItemCheckClose)
+ }
+ _ => State::Nok,
+ }
+}
+
+/// At close of task list item check.
+///
+/// ```markdown
+/// > | * [x] y.
+/// ^
+/// ```
+pub fn close(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b']') => {
+ tokenizer.enter(Name::GfmTaskListItemMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTaskListItemMarker);
+ tokenizer.exit(Name::GfmTaskListItemCheck);
+ State::Next(StateName::GfmTaskListItemCheckAfter)
+ }
+ _ => State::Nok,
+ }
+}
+
+/// After task list item check.
+///
+/// ```markdown
+/// > | * [x] y.
+/// ^
+/// ```
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ // EOL in paragraph means there must be something else after it.
+ Some(b'\n') => State::Ok,
+ // Space or tab?
+ // Check what comes after.
+ Some(b'\t' | b' ') => {
+ tokenizer.check(State::Ok, State::Nok);
+ tokenizer.attempt(
+ State::Next(StateName::GfmTaskListItemCheckAfterSpaceOrTab),
+ State::Nok,
+ );
+ State::Retry(space_or_tab(tokenizer))
+ }
+ // EOF, or non-whitespace, both wrong.
+ _ => State::Nok,
+ }
+}
+
+/// After whitespace, after task list item check.
+///
+/// ```markdown
+/// > | * [x] y.
+/// ^
+/// ```
+pub fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {
+ // End of paragraph, after whitespace, after check, is not okay.
+ if tokenizer.current == None {
+ State::Nok
+ } else {
+ State::Ok
+ }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index ba1a0b3..7ac3899 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -146,6 +146,7 @@ pub mod document;
pub mod flow;
pub mod frontmatter;
pub mod gfm_autolink_literal;
+pub mod gfm_task_list_item_check;
pub mod hard_break_escape;
pub mod heading_atx;
pub mod heading_setext;
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 9d40585..65f55d4 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -42,13 +42,21 @@ const MARKERS: [u8; 10] = [
/// Start of text.
///
+/// There is a slightly weird case where task list items have their check at
+/// the start of the first paragraph.
+/// So we start by checking for that.
+///
/// ```markdown
/// > | abc
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.markers = &MARKERS;
- State::Retry(StateName::TextBefore)
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBefore),
+ );
+ State::Retry(StateName::GfmTaskListItemCheckStart)
}
/// Before text.
diff --git a/src/event.rs b/src/event.rs
index 3c690e1..f20c599 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -1074,6 +1074,80 @@ pub enum Name {
/// ^
/// ```
GfmStrikethroughText,
+ /// GFM: Task list item check.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [text content][crate::construct::text]
+ /// * **Content model**:
+ /// [`GfmTaskListItemMarker`][Name::GfmTaskListItemMarker],
+ /// [`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked],
+ /// [`GfmTaskListItemValueUnchecked`][Name::GfmTaskListItemValueUnchecked]
+ /// * **Construct**:
+ /// [`gfm_task_list_item_check`][crate::construct::gfm_task_list_item_check]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | * [x] y.
+ /// ^^^
+ /// ```
+ GfmTaskListItemCheck,
+ /// GFM: Task list item check marker.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_task_list_item_check`][crate::construct::gfm_task_list_item_check]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | * [x] y.
+ /// ^ ^
+ /// ```
+ GfmTaskListItemMarker,
+ /// GFM: Task list item value: checked.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_task_list_item_check`][crate::construct::gfm_task_list_item_check]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | * [x] y.
+ /// ^
+ /// ```
+ GfmTaskListItemValueChecked,
+ /// GFM: Task list item value: unchecked.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_task_list_item_check`][crate::construct::gfm_task_list_item_check]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | * [ ] z.
+ /// ^
+ /// ```
+ GfmTaskListItemValueUnchecked,
/// Whole hard break (escape).
///
/// ## Info
@@ -2031,7 +2105,7 @@ pub enum Name {
}
/// List of void events, used to make sure everything is working well.
-pub const VOID_EVENTS: [Name; 47] = [
+pub const VOID_EVENTS: [Name; 50] = [
Name::AttentionSequence,
Name::AutolinkEmail,
Name::AutolinkMarker,
@@ -2061,6 +2135,9 @@ pub const VOID_EVENTS: [Name; 47] = [
Name::GfmAutolinkLiteralProtocol,
Name::GfmAutolinkLiteralWww,
Name::GfmStrikethroughSequence,
+ Name::GfmTaskListItemMarker,
+ Name::GfmTaskListItemValueChecked,
+ Name::GfmTaskListItemValueUnchecked,
Name::FrontmatterSequence,
Name::HardBreakEscape,
Name::HardBreakTrailing,
diff --git a/src/lib.rs b/src/lib.rs
index 893255a..5b7836c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -178,6 +178,13 @@ pub struct Constructs {
/// ^^^
/// ```
pub gfm_strikethrough: bool,
+ /// GFM: task list item.
+ ///
+ /// ```markdown
+ /// > | * [x] y.
+ /// ^^^
+ /// ```
+ pub gfm_task_list_item: bool,
/// Hard break (escape).
///
/// ```markdown
@@ -277,6 +284,7 @@ impl Default for Constructs {
frontmatter: false,
gfm_autolink_literal: false,
gfm_strikethrough: false,
+ gfm_task_list_item: false,
hard_break_escape: true,
hard_break_trailing: true,
heading_atx: true,
@@ -301,6 +309,7 @@ impl Constructs {
Self {
gfm_autolink_literal: true,
gfm_strikethrough: true,
+ gfm_task_list_item: true,
..Self::default()
}
}
diff --git a/src/state.rs b/src/state.rs
index da935d1..65ffbeb 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -145,6 +145,12 @@ pub enum Name {
FrontmatterCloseSequence,
FrontmatterCloseAfter,
+ GfmTaskListItemCheckStart,
+ GfmTaskListItemCheckInside,
+ GfmTaskListItemCheckClose,
+ GfmTaskListItemCheckAfter,
+ GfmTaskListItemCheckAfterSpaceOrTab,
+
HardBreakEscapeStart,
HardBreakEscapeAfter,
@@ -444,6 +450,14 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::FrontmatterCloseSequence => construct::frontmatter::close_sequence,
Name::FrontmatterCloseAfter => construct::frontmatter::close_after,
+ Name::GfmTaskListItemCheckStart => construct::gfm_task_list_item_check::start,
+ Name::GfmTaskListItemCheckInside => construct::gfm_task_list_item_check::inside,
+ Name::GfmTaskListItemCheckClose => construct::gfm_task_list_item_check::close,
+ Name::GfmTaskListItemCheckAfter => construct::gfm_task_list_item_check::after,
+ Name::GfmTaskListItemCheckAfterSpaceOrTab => {
+ construct::gfm_task_list_item_check::after_space_or_tab
+ }
+
Name::HardBreakEscapeStart => construct::hard_break_escape::start,
Name::HardBreakEscapeAfter => construct::hard_break_escape::after,
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 5932f11..7fcc481 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -17,11 +17,11 @@
//! whole document needs to be parsed up to the level of definitions, before
//! any level that can include references can be parsed.
-use crate::event::{Content, Event, Kind, VOID_EVENTS};
+use crate::event::{Content, Event, Kind, Name, VOID_EVENTS};
use crate::parser::ParseState;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-use crate::util::edit_map::EditMap;
+use crate::util::{edit_map::EditMap, skip};
use alloc::{vec, vec::Vec};
/// Link two [`Event`][]s.
@@ -94,6 +94,34 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
StateName::TextStart
});
+ // Check if this is the first paragraph, after zero or more
+ // definitions (or a blank line), in a list item.
+ // Used for GFM task list items.
+ if tokenizer.parse_state.options.constructs.gfm_task_list_item
+ && index > 2
+ && events[index - 1].kind == Kind::Enter
+ && events[index - 1].name == Name::Paragraph
+ {
+ let before = skip::opt_back(
+ events,
+ index - 2,
+ &[
+ Name::BlankLineEnding,
+ Name::Definition,
+ Name::LineEnding,
+ Name::SpaceOrTab,
+ ],
+ );
+
+ if events[before].kind == Kind::Exit
+ && events[before].name == Name::ListItemPrefix
+ {
+ tokenizer
+ .tokenize_state
+ .document_at_first_paragraph_of_list_item = true;
+ }
+ }
+
// Loop through links to pass them in order to the subtokenizer.
while let Some(index) = link_index {
let enter = &events[index];
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 0bd1f31..731b829 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -144,6 +144,10 @@ pub struct TokenizeState<'a> {
pub document_exits: Vec<Option<Vec<Event>>>,
/// Whether the previous flow was a paragraph.
pub document_paragraph_before: bool,
+ /// Whether this is the first paragraph (potentially after definitions) in
+ /// a list item.
+ /// Used for GFM task list items.
+ pub document_at_first_paragraph_of_list_item: bool,
// Couple of very frequent settings for parsing whitespace.
pub space_or_tab_eol_content: Option<Content>,
@@ -282,6 +286,7 @@ impl<'a> Tokenizer<'a> {
document_data_index: None,
document_child_state: None,
document_child: None,
+ document_at_first_paragraph_of_list_item: false,
definitions: vec![],
end: 0,
label_starts: vec![],
@@ -509,11 +514,6 @@ impl<'a> Tokenizer<'a> {
/// Stack an attempt, moving to `ok` on [`State::Ok`][] and `nok` on
/// [`State::Nok`][], reverting in both cases.
pub fn check(&mut self, ok: State, nok: State) {
- debug_assert_ne!(
- nok,
- State::Nok,
- "checking w/ `State::Nok` should likely be an attempt"
- );
// Always capture (and restore) when checking.
// No need to capture (and restore) when `nok` is `State::Nok`, because the
// parent attempt will do it.
diff --git a/src/util/skip.rs b/src/util/skip.rs
index a7de408..df63498 100644
--- a/src/util/skip.rs
+++ b/src/util/skip.rs
@@ -59,12 +59,20 @@ fn skip_opt_impl(events: &[Event], mut index: usize, names: &[Name], forward: bo
balance - 1
};
+ let next = if forward {
+ index + 1
+ } else if index > 0 {
+ index - 1
+ } else {
+ index
+ };
+
if events[index].name == *current && balance == 0 {
- index = if forward { index + 1 } else { index - 1 };
+ index = next;
break;
}
- index = if forward { index + 1 } else { index - 1 };
+ index = next;
}
}