From 4f776347163a514abadc7ded95e66a459be03bc9 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Sat, 29 Oct 2022 14:03:59 +0200 Subject: Fix GFM task list checkboxes followed by eol Closes GH-24. --- src/to_mdast.rs | 92 +++++++++++++++++++++++---------------------- tests/fuzz.rs | 8 ++++ tests/gfm_task_list_item.rs | 60 ++++++++++++++++++++++++++++- 3 files changed, 114 insertions(+), 46 deletions(-) diff --git a/src/to_mdast.rs b/src/to_mdast.rs index f00a4d6..4d2ca76 100644 --- a/src/to_mdast.rs +++ b/src/to_mdast.rs @@ -90,7 +90,6 @@ struct CompileContext<'a> { // compile markdown. character_reference_marker: u8, gfm_table_inside: bool, - gfm_task_list_item_check_after: bool, hard_break_after: bool, heading_setext_text_after: bool, jsx_tag_stack: Vec, @@ -128,7 +127,6 @@ impl<'a> CompileContext<'a> { bytes, character_reference_marker: 0, gfm_table_inside: false, - gfm_task_list_item_check_after: false, hard_break_after: false, heading_setext_text_after: false, jsx_tag_stack: vec![], @@ -347,7 +345,6 @@ fn exit(context: &mut CompileContext) -> Result<(), String> { | Name::GfmTableRow | Name::GfmTableCell | Name::HeadingAtx - | Name::ListItem | Name::ListOrdered | Name::ListUnordered | Name::Paragraph @@ -358,6 +355,7 @@ fn exit(context: &mut CompileContext) -> Result<(), String> { Name::CharacterEscapeValue | Name::CodeFlowChunk | Name::CodeTextData + | Name::Data | Name::FrontmatterChunk | Name::HtmlFlowData | Name::HtmlTextData @@ -385,7 +383,6 @@ fn exit(context: &mut CompileContext) -> Result<(), String> { Name::CodeFenced | Name::MathFlow => on_exit_raw_flow(context)?, Name::CodeIndented => on_exit_code_indented(context)?, Name::CodeText | Name::MathText => on_exit_raw_text(context)?, - Name::Data => on_exit_data_actual(context)?, Name::DefinitionDestinationString => on_exit_definition_destination_string(context), Name::DefinitionLabelString | Name::GfmFootnoteDefinitionLabelString => { on_exit_definition_id(context); @@ -399,7 +396,6 @@ fn exit(context: &mut CompileContext) -> Result<(), String> { | Name::GfmAutolinkLiteralXmpp => on_exit_gfm_autolink_literal(context)?, Name::GfmFootnoteCall | Name::Image | Name::Link => on_exit_media(context)?, Name::GfmTable => on_exit_gfm_table(context)?, - Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context), Name::GfmTaskListItemValueUnchecked | Name::GfmTaskListItemValueChecked => { on_exit_gfm_task_list_item_value(context); } @@ -411,6 +407,7 @@ fn exit(context: &mut CompileContext) -> Result<(), String> { Name::HtmlFlow | Name::HtmlText => on_exit_html(context)?, Name::LabelText => on_exit_label_text(context), Name::LineEnding => on_exit_line_ending(context)?, + Name::ListItem => on_exit_list_item(context)?, Name::ListItemValue => on_exit_list_item_value(context), Name::MdxEsm | Name::MdxFlowExpression | Name::MdxTextExpression => { on_exit_mdx_esm_or_expression(context)?; @@ -1089,29 +1086,6 @@ fn on_exit_data(context: &mut CompileContext) -> Result<(), String> { Ok(()) } -/// Handle [`Exit`][Kind::Exit]:[`Data`][Name::Data] itself. -fn on_exit_data_actual(context: &mut CompileContext) -> Result<(), String> { - on_exit_data(context)?; - - // This field is set when a check exits. - // When that’s the case, there’s always a `data` event right after it. - // That data event is the first child (after the check) of the paragraph. - // We update the text positional info (from the already fixed paragraph), - // and remove the first byte, which is always a space or tab. - if context.gfm_task_list_item_check_after { - let parent = context.tail_mut(); - let start = parent.position().unwrap().start.clone(); - let node = parent.children_mut().unwrap().last_mut().unwrap(); - node.position_mut().unwrap().start = start; - if let Node::Text(node) = node { - node.value.remove(0); - } - context.gfm_task_list_item_check_after = false; - } - - Ok(()) -} - /// Handle [`Exit`][Kind::Exit]:[`DefinitionDestinationString`][Name::DefinitionDestinationString]. fn on_exit_definition_destination_string(context: &mut CompileContext) { let value = context.resume().to_string(); @@ -1210,23 +1184,6 @@ fn on_exit_gfm_table(context: &mut CompileContext) -> Result<(), String> { Ok(()) } -/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]. -fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) { - // This field is set when a check exits. - // When that’s the case, there’s always a `data` event right after it. - // That data event is the first child (after the check) of the paragraph. - // We update the paragraph positional info to start after the check. - let mut start = point_from_event(&context.events[context.index]); - debug_assert!( - matches!(context.bytes[start.offset], b'\t' | b' '), - "expected tab or space after check" - ); - start.column += 1; - start.offset += 1; - context.tail_mut().position_mut().unwrap().start = start; - context.gfm_task_list_item_check_after = true; -} - /// Handle [`Exit`][Kind::Exit]:{[`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked],[`GfmTaskListItemValueUnchecked`][Name::GfmTaskListItemValueUnchecked]}. fn on_exit_gfm_task_list_item_value(context: &mut CompileContext) { let checked = context.events[context.index].name == Name::GfmTaskListItemValueChecked; @@ -1416,6 +1373,51 @@ fn on_exit_media(context: &mut CompileContext) -> Result<(), String> { Ok(()) } +/// Handle [`Exit`][Kind::Exit]:[`ListItem`][Name::ListItem]. +fn on_exit_list_item(context: &mut CompileContext) -> Result<(), String> { + if let Node::ListItem(item) = context.tail_mut() { + if item.checked.is_some() { + if let Some(Node::Paragraph(paragraph)) = item.children.first_mut() { + if let Some(Node::Text(text)) = paragraph.children.first_mut() { + let mut point = text.position.as_ref().unwrap().start.clone(); + let bytes = text.value.as_bytes(); + let mut start = 0; + + // Move past eol. + if matches!(bytes[0], b'\t' | b' ') { + point.offset += 1; + point.column += 1; + start += 1; + } else if matches!(bytes[0], b'\r' | b'\n') { + point.line += 1; + point.column = 1; + point.offset += 1; + start += 1; + // Move past the LF of CRLF. + if bytes.len() > 1 && bytes[0] == b'\r' && bytes[1] == b'\n' { + point.offset += 1; + start += 1; + } + } + + // The whole text is whitespace: update the text. + if start == bytes.len() { + paragraph.children.remove(0); + } else { + text.value = str::from_utf8(&bytes[start..]).unwrap().into(); + text.position.as_mut().unwrap().start = point.clone(); + } + paragraph.position.as_mut().unwrap().start = point; + } + } + } + } + + on_exit(context)?; + + Ok(()) +} + /// Handle [`Exit`][Kind::Exit]:[`ListItemValue`][Name::ListItemValue]. fn on_exit_list_item_value(context: &mut CompileContext) { let start = Slice::from_position( diff --git a/tests/fuzz.rs b/tests/fuzz.rs index d1a87b7..297e6a9 100644 --- a/tests/fuzz.rs +++ b/tests/fuzz.rs @@ -109,5 +109,13 @@ fn fuzz() -> Result<(), String> { "10: attention in different links (GH-21)" ); + assert!( + matches!( + to_mdast("* [ ]\na", &Default::default()), + Ok(mdast::Node::Root(_)) + ), + "11: gfm task list items followed by eols (GH-24)" + ); + Ok(()) } diff --git a/tests/gfm_task_list_item.rs b/tests/gfm_task_list_item.rs index 638206f..8b3f066 100644 --- a/tests/gfm_task_list_item.rs +++ b/tests/gfm_task_list_item.rs @@ -1,5 +1,5 @@ use markdown::{ - mdast::{List, ListItem, Node, Paragraph, Root, Text}, + mdast::{Emphasis, List, ListItem, Node, Paragraph, Root, Text}, to_html, to_html_with_options, to_mdast, unist::Position, Options, ParseOptions, @@ -291,5 +291,63 @@ Text. "should support task list items as `checked` fields on `ListItem`s in mdast" ); + assert_eq!( + to_mdast( + "* [x]\r\n a\n* [ ] b\n* [x]\t \r*c*", + &ParseOptions::gfm() + )?, + Node::Root(Root { + children: vec![Node::List(List { + ordered: false, + spread: false, + start: None, + children: vec![ + Node::ListItem(ListItem { + checked: Some(true), + spread: false, + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: "a".into(), + position: Some(Position::new(2, 1, 7, 2, 4, 10)) + }),], + position: Some(Position::new(2, 1, 7, 2, 4, 10)) + })], + position: Some(Position::new(1, 1, 0, 2, 4, 10)) + }), + Node::ListItem(ListItem { + checked: Some(false), + spread: false, + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: " b".into(), + position: Some(Position::new(3, 7, 17, 3, 10, 20)) + }),], + position: Some(Position::new(3, 7, 17, 3, 10, 20)) + })], + position: Some(Position::new(3, 1, 11, 3, 10, 20)) + }), + Node::ListItem(ListItem { + checked: Some(true), + spread: false, + children: vec![Node::Paragraph(Paragraph { + children: vec![Node::Emphasis(Emphasis { + children: vec![Node::Text(Text { + value: "c".into(), + position: Some(Position::new(5, 2, 30, 5, 3, 31)) + }),], + position: Some(Position::new(5, 1, 29, 5, 4, 32)) + })], + position: Some(Position::new(5, 1, 29, 5, 4, 32)) + })], + position: Some(Position::new(4, 1, 21, 5, 4, 32)) + }), + ], + position: Some(Position::new(1, 1, 0, 5, 4, 32)) + })], + position: Some(Position::new(1, 1, 0, 5, 4, 32)) + }), + "should handle lots of whitespace after checkbox, and non-text" + ); + Ok(()) } -- cgit