diff options
-rw-r--r-- | src/compiler.rs | 235 | ||||
-rw-r--r-- | src/construct/document.rs | 16 | ||||
-rw-r--r-- | src/construct/flow.rs | 41 | ||||
-rw-r--r-- | src/construct/gfm_autolink_literal.rs | 1 | ||||
-rw-r--r-- | src/construct/gfm_footnote_definition.rs | 2 | ||||
-rw-r--r-- | src/construct/gfm_table.rs | 1042 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 1 | ||||
-rw-r--r-- | src/construct/label_end.rs | 1 | ||||
-rw-r--r-- | src/construct/mod.rs | 2 | ||||
-rw-r--r-- | src/event.rs | 264 | ||||
-rw-r--r-- | src/lib.rs | 13 | ||||
-rw-r--r-- | src/resolve.rs | 6 | ||||
-rw-r--r-- | src/state.rs | 51 | ||||
-rw-r--r-- | src/tokenizer.rs | 9 | ||||
-rw-r--r-- | tests/gfm_table.rs | 1782 | ||||
-rw-r--r-- | tests/heading_setext.rs | 12 |
16 files changed, 3442 insertions, 36 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 9057505..5626f8a 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -68,6 +68,43 @@ struct Definition { title: Option<String>, } +/// GFM table: column alignment. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +enum GfmTableAlign { + /// No alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | --- | + /// ^^^ + /// ``` + None, + /// Left alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | :-- | + /// ^^^ + /// ``` + Left, + /// Center alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | :-: | + /// ^^^ + /// ``` + Center, + /// Right alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | --: | + /// ^^^ + /// ``` + Right, +} + /// Context used to compile markdown. #[allow(clippy::struct_excessive_bools)] #[derive(Debug)] @@ -107,6 +144,12 @@ struct CompileContext<'a> { pub gfm_footnote_definitions: Vec<(String, String)>, pub gfm_footnote_definition_calls: Vec<(String, usize)>, pub gfm_footnote_definition_stack: Vec<(usize, usize)>, + /// Whether we are in a GFM table head. + pub gfm_table_in_head: bool, + /// Current GFM table alignment. + pub gfm_table_align: Option<Vec<GfmTableAlign>>, + /// Current GFM table column. + pub gfm_table_column: usize, // Fields used to influance the current compilation. /// Ignore the next line ending. pub slurp_one_line_ending: bool, @@ -153,6 +196,9 @@ impl<'a> CompileContext<'a> { gfm_footnote_definitions: vec![], gfm_footnote_definition_calls: vec![], gfm_footnote_definition_stack: vec![], + gfm_table_in_head: false, + gfm_table_align: None, + gfm_table_column: 0, tight_stack: vec![], slurp_one_line_ending: false, image_alt_inside: false, @@ -350,6 +396,11 @@ fn enter(context: &mut CompileContext) { Name::GfmFootnoteDefinition => on_enter_gfm_footnote_definition(context), Name::GfmFootnoteCall => on_enter_gfm_footnote_call(context), Name::GfmStrikethrough => on_enter_gfm_strikethrough(context), + Name::GfmTable => on_enter_gfm_table(context), + Name::GfmTableBody => on_enter_gfm_table_body(context), + Name::GfmTableCell => on_enter_gfm_table_cell(context), + Name::GfmTableHead => on_enter_gfm_table_head(context), + Name::GfmTableRow => on_enter_gfm_table_row(context), Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context), Name::HtmlFlow => on_enter_html_flow(context), Name::HtmlText => on_enter_html_text(context), @@ -407,6 +458,11 @@ fn exit(context: &mut CompileContext) { Name::GfmFootnoteDefinitionPrefix => on_exit_gfm_footnote_definition_prefix(context), Name::GfmFootnoteDefinition => on_exit_gfm_footnote_definition(context), Name::GfmStrikethrough => on_exit_gfm_strikethrough(context), + Name::GfmTable => on_exit_gfm_table(context), + Name::GfmTableBody => on_exit_gfm_table_body(context), + Name::GfmTableCell => on_exit_gfm_table_cell(context), + Name::GfmTableHead => on_exit_gfm_table_head(context), + Name::GfmTableRow => on_exit_gfm_table_row(context), Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context), Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context), Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context), @@ -536,6 +592,105 @@ fn on_enter_gfm_strikethrough(context: &mut CompileContext) { } } +/// Handle [`Enter`][Kind::Enter]:[`GfmTable`][Name::GfmTable]. +fn on_enter_gfm_table(context: &mut CompileContext) { + // Find the alignment. + let mut index = context.index; + let mut in_delimiter_row = false; + let mut align = vec![]; + + while index < context.events.len() { + let event = &context.events[index]; + + if in_delimiter_row { + if event.kind == Kind::Enter { + // Start of alignment value: set a new column. + if event.name == Name::GfmTableDelimiterCellValue { + align.push( + if context.events[index + 1].name == Name::GfmTableDelimiterMarker { + GfmTableAlign::Left + } else { + GfmTableAlign::None + }, + ); + } + } else { + // End of alignment value: change the column. + if event.name == Name::GfmTableDelimiterCellValue { + if context.events[index - 1].name == Name::GfmTableDelimiterMarker { + let align_index = align.len() - 1; + align[align_index] = if align[align_index] == GfmTableAlign::Left { + GfmTableAlign::Center + } else { + GfmTableAlign::Right + } + } + } + // Done! + else if event.name == Name::GfmTableDelimiterRow { + break; + } + } + } else if event.kind == Kind::Enter && event.name == Name::GfmTableDelimiterRow { + in_delimiter_row = true; + } + + index += 1; + } + + // Generate. + context.gfm_table_align = Some(align); + context.line_ending_if_needed(); + context.push("<table>"); +} + +/// Handle [`Enter`][Kind::Enter]:[`GfmTableBody`][Name::GfmTableBody]. +fn on_enter_gfm_table_body(context: &mut CompileContext) { + context.push("<tbody>"); +} + +/// Handle [`Enter`][Kind::Enter]:[`GfmTableCell`][Name::GfmTableCell]. +fn on_enter_gfm_table_cell(context: &mut CompileContext) { + let column = context.gfm_table_column; + let align = context.gfm_table_align.as_ref().unwrap(); + + if column >= align.len() { + // Capture cell to ignore it. + context.buffer(); + } else { + let value = align[column]; + context.line_ending_if_needed(); + + if context.gfm_table_in_head { + context.push("<th"); + } else { + context.push("<td"); + } + + match value { + GfmTableAlign::Left => context.push(" align=\"left\""), + GfmTableAlign::Right => context.push(" align=\"right\""), + GfmTableAlign::Center => context.push(" align=\"center\""), + GfmTableAlign::None => {} + } + + context.push(">"); + } +} + +/// Handle [`Enter`][Kind::Enter]:[`GfmTableHead`][Name::GfmTableHead]. +fn on_enter_gfm_table_head(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push("<thead>"); + context.gfm_table_in_head = true; +} + +/// Handle [`Enter`][Kind::Enter]:[`GfmTableRow`][Name::GfmTableRow]. +fn on_enter_gfm_table_row(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push("<tr>"); +} + /// Handle [`Enter`][Kind::Enter]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]. fn on_enter_gfm_task_list_item_check(context: &mut CompileContext) { if !context.image_alt_inside { @@ -892,7 +1047,24 @@ fn on_exit_raw_flow(context: &mut CompileContext) { /// Handle [`Exit`][Kind::Exit]:{[`CodeText`][Name::CodeText],[`MathText`][Name::MathText]}. fn on_exit_raw_text(context: &mut CompileContext) { let result = context.resume(); - let mut bytes = result.as_bytes(); + let mut bytes = result.as_bytes().to_vec(); + + // If we are in a GFM table, we need to decode escaped pipes. + // This is a rather weird GFM feature. + if context.gfm_table_align.is_some() { + let mut index = 0; + let mut len = bytes.len(); + + while index < len { + if index + 1 < len && bytes[index] == b'\\' && bytes[index + 1] == b'|' { + bytes.remove(index); + len -= 1; + } + + index += 1; + } + } + let mut trim = false; let mut index = 0; let mut end = bytes.len(); @@ -910,11 +1082,12 @@ fn on_exit_raw_text(context: &mut CompileContext) { } if trim { - bytes = &bytes[1..end]; + bytes.remove(0); + bytes.pop(); } context.raw_text_inside = false; - context.push(str::from_utf8(bytes).unwrap()); + context.push(str::from_utf8(&bytes).unwrap()); if !context.image_alt_inside { context.push("</code>"); @@ -1113,6 +1286,62 @@ fn on_exit_gfm_strikethrough(context: &mut CompileContext) { } } +/// Handle [`Exit`][Kind::Exit]:[`GfmTable`][Name::GfmTable]. +fn on_exit_gfm_table(context: &mut CompileContext) { + context.gfm_table_align = None; + context.line_ending_if_needed(); + context.push("</table>"); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableBody`][Name::GfmTableBody]. +fn on_exit_gfm_table_body(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push("</tbody>"); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableCell`][Name::GfmTableCell]. +fn on_exit_gfm_table_cell(context: &mut CompileContext) { + let align = context.gfm_table_align.as_ref().unwrap(); + + if context.gfm_table_column < align.len() { + if context.gfm_table_in_head { + context.push("</th>"); + } else { + context.push("</td>"); + } + } else { + // Stop capturing. + context.resume(); + } + + context.gfm_table_column += 1; +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableHead`][Name::GfmTableHead]. +fn on_exit_gfm_table_head(context: &mut CompileContext) { + context.gfm_table_in_head = false; + context.line_ending_if_needed(); + context.push("</thead>"); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableRow`][Name::GfmTableRow]. +fn on_exit_gfm_table_row(context: &mut CompileContext) { + let mut column = context.gfm_table_column; + let len = context.gfm_table_align.as_ref().unwrap().len(); + + // Add “phantom” cells, for body rows that are shorter than the delimiter + // row (which is equal to the head row). + while column < len { + on_enter_gfm_table_cell(context); + on_exit_gfm_table_cell(context); + column += 1; + } + + context.gfm_table_column = 0; + context.line_ending_if_needed(); + context.push("</tr>"); +} + /// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]. fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) { if !context.image_alt_inside { diff --git a/src/construct/document.rs b/src/construct/document.rs index 9c76e46..e31e58d 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -269,6 +269,14 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { exit_containers(tokenizer, &Phase::Prefix); } + // We are “piercing” into the flow with a new container. + tokenizer + .tokenize_state + .document_child + .as_mut() + .unwrap() + .pierce = true; + tokenizer .tokenize_state .document_container_stack @@ -398,12 +406,11 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { let mut stack_index = child.stack.len(); // Use two algo’s: one for when we’re suspended or in multiline things - // like definitions, another (b) for when we fed the line ending and closed - // a) + // like definitions, another for when we fed the line ending and closed. while !document_lazy_continuation_current && stack_index > 0 { stack_index -= 1; let name = &child.stack[stack_index]; - if name == &Name::Paragraph || name == &Name::Definition { + if name == &Name::Paragraph || name == &Name::Definition || name == &Name::GfmTableHead { document_lazy_continuation_current = true; } } @@ -418,6 +425,9 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { } } + // Reset “piercing”. + child.pierce = false; + if child.lazy && tokenizer.tokenize_state.document_lazy_accepting_before && document_lazy_continuation_current diff --git a/src/construct/flow.rs b/src/construct/flow.rs index 3f1cd77..3f7bc9c 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -65,29 +65,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::HtmlFlowStart) } - // Note: `-` is also used in thematic breaks so it’s not included here. - Some(b'=') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::HeadingSetextStart) - } - Some(b'[') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::DefinitionStart) - } // Actual parsing: blank line? Indented code? Indented anything? - // Also includes `-` which can be a setext heading underline or thematic break. - None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), - // Must be a paragraph. - Some(_) => { - tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); - State::Retry(StateName::ParagraphStart) - } + // Tables, setext heading underlines, definitions, and paragraphs are + // particularly weird. + _ => State::Retry(StateName::FlowBlankLineBefore), } } @@ -185,11 +166,25 @@ pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeDefinition), + State::Next(StateName::FlowBeforeGfmTable), ); State::Retry(StateName::ThematicBreakStart) } +/// At GFM table. +/// +/// ```markdown +/// > | | a | +/// ^ +/// ``` +pub fn before_gfm_table(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeDefinition), + ); + State::Retry(StateName::GfmTableStart) +} + /// At definition. /// /// ```markdown diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs index 7fdeb01..704c536 100644 --- a/src/construct/gfm_autolink_literal.rs +++ b/src/construct/gfm_autolink_literal.rs @@ -5,7 +5,6 @@ use crate::tokenizer::Tokenizer; use crate::util::classify_character::{classify, Kind as CharacterKind}; use crate::util::slice::{Position, Slice}; use alloc::vec::Vec; -extern crate std; use core::str; // To do: doc al functions. diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs index 3715044..cbe816f 100644 --- a/src/construct/gfm_footnote_definition.rs +++ b/src/construct/gfm_footnote_definition.rs @@ -141,7 +141,7 @@ //! //! ## References //! -//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-footnote) +//! * [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote) //! //! > 👉 **Note**: Footnotes are not specified in GFM yet. //! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270) diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs new file mode 100644 index 0000000..d7c2b69 --- /dev/null +++ b/src/construct/gfm_table.rs @@ -0,0 +1,1042 @@ +//! GFM: table occurs in the [flow][] content type. +//! +//! ## Grammar +//! +//! Tables form with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! gfm_table ::= gfm_table_head 0*(eol gfm_table_body_row) +//! +//! ; Restriction: both rows must have the same number of cells. +//! gfm_table_head ::= gfm_table_row eol gfm_table_delimiter_row +//! +//! gfm_table_row ::= ['|'] gfm_table_cell 0*('|' gfm_table_cell) ['|'] *space_or_tab +//! gfm_table_cell ::= *space_or_tab gfm_table_text *space_or_tab +//! gfm_table_text ::= 0*(line - '\\' - '|' | '\\' ['\\' | '|']) +// +//! gfm_table_delimiter_row ::= ['|'] gfm_table_delimiter_cell 0*('|' gfm_table_delimiter_cell) ['|'] *space_or_tab +//! gfm_table_delimiter_cell ::= *space_or_tab gfm_table_delimiter_value *space_or_tab +//! gfm_table_delimiter_value ::= [':'] 1*'-' [':'] +//! ``` +//! +//! As this construct occurs in flow, like all flow constructs, it must be +//! followed by an eol (line ending) or eof (end of file). +//! +//! The above grammar shows that basically anything can be a cell or a row. +//! The main thing that makes something a row, is that it occurs directly before +//! or after a delimiter row, or after another row. +//! +//! It is not required for a table to have a body: it can end right after the +//! delimiter row. +//! +//! Each column can be marked with an alignment. +//! The alignment marker is a colon (`:`) used before and/or after delimiter row +//! filler. +//! To illustrate: +//! +//! ```markdown +//! | none | left | right | center | +//! | ---- | :--- | ----: | :----: | +//! ``` +//! +//! The number of cells in the delimiter row, is the number of columns of the +//! table. +//! Only the head row is required to have the same number of cells. +//! Body rows are not required to have a certain number of cells. +//! For body rows that have less cells than the number of columns of the table, +//! empty cells are injected. +//! When a row has more cells than the number of columns of the table, the +//! superfluous cells are dropped. +//! To illustrate: +//! +//! ```markdown +//! | a | b | +//! | - | - | +//! | c | +//! | d | e | f | +//! ``` +//! +//! Yields: +//! +//! ```html +//! <table> +//! <thead> +//! <tr> +//! <th>a</th> +//! <th>b</th> +//! </tr> +//! </thead> +//! <tbody> +//! <tr> +//! <td>c</td> +//! <td></td> +//! </tr> +//! <tr> +//! <td>d</td> +//! <td>e</td> +//! </tr> +//! </tbody> +//! </table> +//! ``` +//! +//! Each cell’s text is interpreted as the [text][] content type. +//! That means that it can include constructs such as [attention][attention]. +//! +//! The grammar for cells prohibits the use of `|` in them. +//! To use pipes in cells, encode them as a character reference or character +//! escape: `|` (or `|`, `|`, `|`, `|`) or +//! `\|`. +//! +//! Escapes will typically work, but they are not supported in +//! [code (text)][raw_text] (and the math (text) extension). +//! To work around this, GitHub came up with a rather weird “trick”. +//! When inside a table cell *and* inside code, escaped pipes *are* decoded. +//! To illustrate: +//! +//! ```markdown +//! | Name | Character | +//! | - | - | +//! | Left curly brace | `{` | +//! | Pipe | `\|` | +//! | Right curly brace | `}` | +//! ``` +//! +//! Yields: +//! +//! ```html +//! <table> +//! <thead> +//! <tr> +//! <th>Name</th> +//! <th>Character</th> +//! </tr> +//! </thead> +//! <tbody> +//! <tr> +//! <td>Left curly brace</td> +//! <td><code>{</code></td> +//! </tr> +//! <tr> +//! <td>Pipe</td> +//! <td><code>|</code></td> +//! </tr> +//! <tr> +//! <td>Right curly brace</td> +//! <td><code>}</code></td> +//! </tr> +//! </tbody> +//! </table> +//! ``` +//! +//! > 👉 **Note**: no other character can be escaped like this. +//! > Escaping pipes in code does not work when not inside a table, either. +//! +//! ## HTML +//! +//! GFM tables relate to several HTML elements: `<table>`, `<tbody>`, `<td>`, +//! `<th>`, `<thead>`, and `<tr>`. +//! See +//! [*§ 4.9.1 The `table` element*][html_table], +//! [*§ 4.9.5 The `tbody` element*][html_tbody], +//! [*§ 4.9.9 The `td` element*][html_td], +//! [*§ 4.9.10 The `th` element*][html_th], +//! [*§ 4.9.6 The `thead` element*][html_thead], and +//! [*§ 4.9.8 The `tr` element*][html_tr] +//! in the HTML spec for more info. +//! +//! If the the alignment of a column is left, right, or center, a deprecated +//! `align` attribute is added to each `<th>` and `<td>` element belonging to +//! that column. +//! That attribute is interpreted by browsers as if a CSS `text-align` property +//! was included, with its value set to that same keyword. +//! +//! ## Recommendation +//! +//! When authoring markdown with GFM tables, it’s recommended to *always* put +//! pipes around cells. +//! Without them, it can be hard to infer whether the table will work, how many +//! columns there are, and which column you are currently editing. +//! +//! It is recommended to not use many columns, as it results in very long lines, +//! making it hard to infer which column you are currently editing. +//! +//! For larger tables, particularly when cells vary in size, it is recommended +//! *not* to manually “pad” cell text. +//! While it can look better, it results in a lot of time spent realigning +//! everything when a new, longer cell is added or the longest cell removed, as +//! every row then must be changed. +//! Other than costing time, it also causes large diffs in Git. +//! +//! To illustrate, when authoring large tables, it is discouraged to pad cells +//! like this: +//! +//! ```markdown +//! | Alpha bravo charlie | delta | +//! | ------------------- | -----------------: | +//! | Echo | Foxtrot golf hotel | +//! ``` +//! +//! Instead, use single spaces (and single filler dashes): +//! +//! ```markdown +//! | Alpha bravo charlie | delta | +//! | - | -: | +//! | Echo | Foxtrot golf hotel | +//! ``` +//! +//! ## Bugs +//! +//! GitHub’s own algorithm to parse tables contains a bug. +//! This bug is not present in this project. +//! The issue relating to tables is: +//! +//! * [GFM tables: escaped escapes are incorrectly treated as escapes](https://github.com/github/cmark-gfm/issues/277)\ +//! +//! ## Tokens +//! +//! * [`GfmTable`][Name::GfmTable] +//! * [`GfmTableBody`][Name::GfmTableBody] +//! * [`GfmTableCell`][Name::GfmTableCell] +//! * [`GfmTableCellDivider`][Name::GfmTableCellDivider] +//! * [`GfmTableCellText`][Name::GfmTableCellText] +//! * [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell] +//! * [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue] +//! * [`GfmTableDelimiterFiller`][Name::GfmTableDelimiterFiller] +//! * [`GfmTableDelimiterMarker`][Name::GfmTableDelimiterMarker] +//! * [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow] +//! * [`GfmTableHead`][Name::GfmTableHead] +//! * [`GfmTableRow`][Name::GfmTableRow] +//! * [`LineEnding`][Name::LineEnding] +//! +//! ## References +//! +//! * [`micromark-extension-gfm-table`](https://github.com/micromark/micromark-extension-gfm-table) +//! * [*§ 4.10 Tables (extension)* in `GFM`](https://github.github.com/gfm/#tables-extension-) +//! +//! [flow]: crate::construct::flow +//! [text]: crate::construct::text +//! [attention]: crate::construct::attention +//! [raw_text]: crate::construct::raw_text +//! [html_table]: https://html.spec.whatwg.org/multipage/tables.html#the-table-element +//! [html_tbody]: https://html.spec.whatwg.org/multipage/tables.html#the-tbody-element +//! [html_td]: https://html.spec.whatwg.org/multipage/tables.html#the-td-element +//! [html_th]: https://html.spec.whatwg.org/multipage/tables.html#the-th-element +//! [html_thead]: https://html.spec.whatwg.org/multipage/tables.html#the-thead-element +//! [html_tr]: https://html.spec.whatwg.org/multipage/tables.html#the-tr-element + +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::event::{Content, Event, Kind, Link, Name}; +use crate::resolve::Name as ResolveName; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; +use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back}; +use alloc::vec; + +/// Start of a GFM table. +/// +/// If there is a valid table row or table head before, then we try to parse +/// another row. +/// Otherwise, we try to parse a head. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.parse_state.options.constructs.gfm_table { + if !tokenizer.pierce + && !tokenizer.events.is_empty() + && matches!( + tokenizer.events[skip_opt_back( + &tokenizer.events, + tokenizer.events.len() - 1, + &[Name::LineEnding, Name::SpaceOrTab], + )] + .name, + Name::GfmTableHead | Name::GfmTableRow + ) + { + State::Retry(StateName::GfmTableBodyRowStart) + } else { + State::Retry(StateName::GfmTableHeadRowBefore) + } + } else { + State::Nok + } +} + +/// Before table head row. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_before(tokenizer: &mut Tokenizer) -> State { + tokenizer.enter(Name::GfmTableHead); + tokenizer.enter(Name::GfmTableRow); + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::GfmTableHeadRowStart), State::Nok); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::GfmTableHeadRowStart) + } +} + +/// Before table head row, after whitespace. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_start(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + // 4+ spaces. + Some(b'\t' | b' ') => State::Nok, + Some(b'|') => State::Retry(StateName::GfmTableHeadRowBreak), + _ => { + tokenizer.tokenize_state.seen = true; + State::Retry(StateName::GfmTableHeadRowBreak) + } + } +} + +/// At break in table head row. +/// +/// ```markdown +/// > | | a | +/// ^ +/// ^ +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_break(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => { + tokenizer.tokenize_state.seen = false; + State::Nok + } + Some(b'\n') => { + // Feel free to interrupt: + tokenizer.interrupt = true; + tokenizer.exit(Name::GfmTableRow); + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::GfmTableHeadDelimiterStart) + } + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } + _ => { + // Whether a delimiter was seen. + if tokenizer.tokenize_state.seen { + tokenizer.tokenize_state.seen = false; + // Header cell count. + tokenizer.tokenize_state.size += 1; + } + + if tokenizer.current == Some(b'|') { + tokenizer.enter(Name::GfmTableCellDivider); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableCellDivider); + // Whether a delimiter was seen. + tokenizer.tokenize_state.seen = true; + State::Next(StateName::GfmTableHeadRowBreak) + } else { + // Anything else is cell data. + tokenizer.enter(Name::Data); + State::Retry(StateName::GfmTableHeadRowData) + } + } + } +} + +/// In table head row data. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_data(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\t' | b'\n' | b' ' | b'|') => { + tokenizer.exit(Name::Data); + State::Retry(StateName::GfmTableHeadRowBreak) + } + _ => { + let name = if tokenizer.current == Some(b'\\') { + StateName::GfmTableHeadRowEscape + } else { + StateName::GfmTableHeadRowData + }; + tokenizer.consume(); + State::Next(name) + } + } +} + +/// In table head row escape. +/// +/// ```markdown +/// > | | a\-b | +/// ^ +/// | | ---- | +/// | | c | +/// ``` +pub fn head_row_escape(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\\' | b'|') => { + tokenizer.consume(); + State::Next(StateName::GfmTableHeadRowData) + } + _ => State::Retry(StateName::GfmTableHeadRowData), + } +} + +/// Before delimiter row. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// | | b | +/// ``` +pub fn head_delimiter_start(tokenizer: &mut Tokenizer) -> State { + // Reset `interrupt`. + tokenizer.interrupt = false; + + if tokenizer.lazy || tokenizer.pierce { + State::Nok + } else { + tokenizer.enter(Name::GfmTableDelimiterRow); + // Track if we’ve seen a `:` or `|`. + tokenizer.tokenize_state.seen = false; + + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt( + State::Next(StateName::GfmTableHeadDelimiterBefore), + State::Next(StateName::GfmTableHeadDelimiterNok), + ); + + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterBefore), + } + } +} + +/// Before delimiter row, after optional whitespace. +/// +/// Reused when a `|` is found later, to parse another cell. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// | | b | +/// ``` +pub fn head_delimiter_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'-' | b':') => State::Retry(StateName::GfmTableHeadDelimiterValueBefore), + Some(b'|') => { + tokenizer.tokenize_state.seen = true; + // If we start with a pipe, we open a cell marker. + tokenizer.enter(Name::GfmTableCellDivider); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableCellDivider); + State::Next(StateName::GfmTableHeadDelimiterCellBefore) + } + // More whitespace / empty row not allowed at start. + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// After `|`, before delimiter cell. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// ``` +pub fn head_delimiter_cell_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt( + State::Next(StateName::GfmTableHeadDelimiterValueBefore), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterValueBefore), + } +} + +/// Before delimiter cell value. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// ``` +pub fn head_delimiter_value_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => State::Retry(StateName::GfmTableHeadDelimiterCellAfter), + Some(b':') => { + // Align: left. + tokenizer.tokenize_state.size_b += 1; + tokenizer.tokenize_state.seen = true; + tokenizer.enter(Name::GfmTableDelimiterMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableDelimiterMarker); + State::Next(StateName::GfmTableHeadDelimiterLeftAlignmentAfter) + } + Some(b'-') => { + // Align: none. + tokenizer.tokenize_state.size_b += 1; + State::Retry(StateName::GfmTableHeadDelimiterLeftAlignmentAfter) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// After delimiter cell left alignment marker. +/// +/// ```markdown +/// | | a | +/// > | | :- | +/// ^ +/// ``` +pub fn head_delimiter_left_alignment_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'-') => { + tokenizer.enter(Name::GfmTableDelimiterFiller); + State::Retry(StateName::GfmTableHeadDelimiterFiller) + } + // Anything else is not ok after the left-align colon. + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// In delimiter cell filler. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// ``` +pub fn head_delimiter_filler(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'-') => { + tokenizer.consume(); + State::Next(StateName::GfmTableHeadDelimiterFiller) + } + Some(b':') => { + // Align is `center` if it was `left`, `right` otherwise. + tokenizer.tokenize_state.seen = true; + tokenizer.exit(Name::GfmTableDelimiterFiller); + tokenizer.enter(Name::GfmTableDelimiterMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableDelimiterMarker); + State::Next(StateName::GfmTableHeadDelimiterRightAlignmentAfter) + } + _ => { + tokenizer.exit(Name::GfmTableDelimiterFiller); + State::Retry(StateName::GfmTableHeadDelimiterRightAlignmentAfter) + } + } +} + +/// After delimiter cell right alignment marker. +/// +/// ```markdown +/// | | a | +/// > | | -: | +/// ^ +/// ``` +pub fn head_delimiter_right_alignment_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt( + State::Next(StateName::GfmTableHeadDelimiterCellAfter), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterCellAfter), + } +} + +/// After delimiter cell. +/// +/// ```markdown +/// | | a | +/// > | | -: | +/// ^ +/// ``` +pub fn head_delimiter_cell_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + // Exit when: + // * there was no `:` or `|` at all (it’s a thematic break or setext + // underline instead) + // * the header cell count is not the delimiter cell count + if !tokenizer.tokenize_state.seen + || tokenizer.tokenize_state.size != tokenizer.tokenize_state.size_b + { + State::Retry(StateName::GfmTableHeadDelimiterNok) + } else { + // Reset. + tokenizer.tokenize_state.seen = false; + tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.size_b = 0; + tokenizer.exit(Name::GfmTableDelimiterRow); + tokenizer.exit(Name::GfmTableHead); + tokenizer.register_resolver(ResolveName::GfmTable); + State::Ok + } + } + Some(b'|') => State::Retry(StateName::GfmTableHeadDelimiterBefore), + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// In delimiter row, at a disallowed byte. +/// +/// ```markdown +/// | | a | +/// > | | x | +/// ^ +/// ``` +pub fn head_delimiter_nok(tokenizer: &mut Tokenizer) -> State { + // Reset. + tokenizer.tokenize_state.seen = false; + tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.size_b = 0; + State::Nok +} + +/// Before table body row. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn body_row_start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.lazy { + State::Nok + } else { + tokenizer.enter(Name::GfmTableRow); + + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBefore), State::Nok); + + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } + _ => State::Retry(StateName::GfmTableBodyRowBefore), + } + } +} + +/// Before table body row, after optional whitespace. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn body_row_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => State::Nok, + _ => State::Retry(StateName::GfmTableBodyRowBreak), + } +} + +/// At break in table body row. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ^ +/// ^ +/// ``` +pub fn body_row_break(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::GfmTableRow); + State::Ok + } + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBreak), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } + Some(b'|') => { + tokenizer.enter(Name::GfmTableCellDivider); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableCellDivider); + State::Next(StateName::GfmTableBodyRowBreak) + } + // Anything else is cell content. + _ => { + tokenizer.enter(Name::Data); + State::Retry(StateName::GfmTableBodyRowData) + } + } +} + +/// In table body row data. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn body_row_data(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\t' | b'\n' | b' ' | b'|') => { + tokenizer.exit(Name::Data); + State::Retry(StateName::GfmTableBodyRowBreak) + } + _ => { + let name = if tokenizer.current == Some(b'\\') { + StateName::GfmTableBodyRowEscape + } else { + StateName::GfmTableBodyRowData + }; + tokenizer.consume(); + State::Next(name) + } + } +} + +/// In table body row escape. +/// +/// ```markdown +/// | | a | +/// | | ---- | +/// > | | b\-c | +/// ^ +/// ``` +pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\\' | b'|') => { + tokenizer.consume(); + State::Next(StateName::GfmTableBodyRowData) + } + _ => State::Retry(StateName::GfmTableBodyRowData), + } +} + +/// Resolve GFM table. +pub fn resolve(tokenizer: &mut Tokenizer) { + let mut index = 0; + // let mut tables = vec![]; + let mut in_first_cell_awaiting_pipe = true; + let mut in_row = false; + let mut in_delimiter_row = false; + let mut last_cell = (0, 0, 0, 0); + let mut cell = (0, 0, 0, 0); + + let mut after_head_awaiting_first_body_row = false; + let mut last_table_end = 0; + let mut last_table_has_body = false; + + while index < tokenizer.events.len() { + let event = &tokenizer.events[index]; + + if event.kind == Kind::Enter { + // Start of head. + if event.name == Name::GfmTableHead { + after_head_awaiting_first_body_row = false; + + // Inject previous (body end and) table end. + if last_table_end != 0 { + flush_table_end(tokenizer, last_table_end, last_table_has_body); + last_table_has_body = false; + last_table_end = 0; + } + + // Inject table start. + tokenizer.map.add( + index, + 0, + vec![Event { + kind: Kind::Enter, + name: Name::GfmTable, + point: tokenizer.events[index].point.clone(), + link: None, + }], + ); + } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { + in_delimiter_row = event.name == Name::GfmTableDelimiterRow; + in_row = true; + in_first_cell_awaiting_pipe = true; + last_cell = (0, 0, 0, 0); + cell = (0, index + 1, 0, 0); + + // Inject table body start. + if after_head_awaiting_first_body_row { + after_head_awaiting_first_body_row = false; + last_table_has_body = true; + tokenizer.map.add( + index, + 0, + vec![Event { + kind: Kind::Enter, + name: Name::GfmTableBody, + point: tokenizer.events[index].point.clone(), + link: None, + }], + ); + } + } + // Cell data. + else if in_row + && (event.name == Name::Data + || event.name == Name::GfmTableDelimiterMarker + || event.name == Name::GfmTableDelimiterFiller) + { + in_first_cell_awaiting_pipe = false; + + // First value in cell. + if cell.2 == 0 { + if last_cell.1 != 0 { + cell.0 = cell.1; + flush_cell(tokenizer, last_cell, in_delimiter_row, None); + last_cell = (0, 0, 0, 0); + } + + cell.2 = index; + } + } else if event.name == Name::GfmTableCellDivider { + if in_first_cell_awaiting_pipe { + in_first_cell_awaiting_pipe = false; + } else { + if last_cell.1 != 0 { + cell.0 = cell.1; + flush_cell(tokenizer, last_cell, in_delimiter_row, None); + } + + last_cell = cell; + cell = (last_cell.1, index, 0, 0); + } + } + // Exit events. + } else if event.name == Name::GfmTableHead { + after_head_awaiting_first_body_row = true; + last_table_end = index; + } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { + in_row = false; + last_table_end = index; + if last_cell.1 != 0 { + cell.0 = cell.1; + flush_cell(tokenizer, last_cell, in_delimiter_row, Some(index)); + } else if cell.1 != 0 { + flush_cell(tokenizer, cell, in_delimiter_row, Some(index)); + } + } else if in_row + && (event.name == Name::Data + || event.name == Name::GfmTableDelimiterMarker + || event.name == Name::GfmTableDelimiterFiller) + { + cell.3 = index; + } + + index += 1; + } + + if last_table_end != 0 { + flush_table_end(tokenizer, last_table_end, last_table_has_body); + } +} + +/// Generate a cell. +fn flush_cell( + tokenizer: &mut Tokenizer, + range: (usize, usize, usize, usize), + in_delimiter_row: bool, + row_end: Option<usize>, +) { + let group_name = if in_delimiter_row { + Name::GfmTableDelimiterCell + } else { + Name::GfmTableCell + }; + let value_name = if in_delimiter_row { + Name::GfmTableDelimiterCellValue + } else { + Name::GfmTableCellText + }; + + // Insert an exit for the previous cell, if there is one. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- exit + // ^^^^-- this cell + // ``` + if range.0 != 0 { + tokenizer.map.add( + range.0, + 0, + vec![Event { + kind: Kind::Exit, + name: group_name.clone(), + point: tokenizer.events[range.0].point.clone(), + link: None, + }], + ); + } + + // Insert enter of this cell. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- enter + // ^^^^-- this cell + // ``` + tokenizer.map.add( + range.1, + 0, + vec![Event { + kind: Kind::Enter, + name: group_name.clone(), + point: tokenizer.events[range.1].point.clone(), + link: None, + }], + ); + + // Insert text start at first data start and end at last data end, and + // remove events between. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- enter + // ^-- exit + // ^^^^-- this cell + // ``` + if range.2 != 0 { + tokenizer.map.add( + range.2, + 0, + vec![Event { + kind: Kind::Enter, + name: value_name.clone(), + point: tokenizer.events[range.2].point.clone(), + link: None, + }], + ); + debug_assert_ne!(range.3, 0); + + if !in_delimiter_row { + tokenizer.events[range.2].link = Some(Link { + previous: None, + next: None, + content: Content::Text, + }); + + if !in_delimiter_row && range.3 > range.2 + 1 { + let a = range.2 + 1; + let b = range.3 - range.2 - 1; + tokenizer.map.add(a, b, vec![]); + } + } + + tokenizer.map.add( + range.3 + 1, + 0, + vec![Event { + kind: Kind::Exit, + name: value_name, + point: tokenizer.events[range.3].point.clone(), + link: None, + }], + ); + } + + // Insert an exit for the last cell, if at the row end. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- exit + // ^^^^^^-- this cell (the last one contains two “between” parts) + // ``` + if let Some(row_end) = row_end { + tokenizer.map.add( + row_end, + 0, + vec![Event { + kind: Kind::Exit, + name: group_name, + point: tokenizer.events[row_end].point.clone(), + link: None, + }], + ); + } +} + +/// Generate table end (and table body end). +fn flush_table_end(tokenizer: &mut Tokenizer, index: usize, body: bool) { + let mut exits = vec![]; + + if body { + exits.push(Event { + kind: Kind::Exit, + name: Name::GfmTableBody, + point: tokenizer.events[index].point.clone(), + link: None, + }); + } + + exits.push(Event { + kind: Kind::Exit, + name: Name::GfmTable, + point: tokenizer.events[index].point.clone(), + link: None, + }); + + tokenizer.map.add(index + 1, 0, exits); +} diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index df1d4fb..e9cc759 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -87,6 +87,7 @@ use alloc::vec; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.options.constructs.heading_setext && !tokenizer.lazy + && !tokenizer.pierce // Require a paragraph before. && (!tokenizer.events.is_empty() && tokenizer.events[skip_opt_back( diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index b5a6013..8a9edfb 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -191,7 +191,6 @@ use crate::util::{ slice::{Position, Slice}, }; use alloc::{string::String, vec}; -extern crate std; /// Start of label end. /// diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 9add015..de88174 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -60,6 +60,7 @@ //! * [frontmatter][] //! * [gfm autolink literal][gfm_autolink_literal] //! * [gfm footnote definition][gfm_footnote_definition] +//! * [gfm table][gfm_table] //! * [gfm task list item check][gfm_task_list_item_check] //! * [gfm label start footnote][gfm_label_start_footnote] //! * math (text) (in `raw_text`) @@ -151,6 +152,7 @@ pub mod frontmatter; pub mod gfm_autolink_literal; pub mod gfm_footnote_definition; pub mod gfm_label_start_footnote; +pub mod gfm_table; pub mod gfm_task_list_item_check; pub mod hard_break_escape; pub mod heading_atx; diff --git a/src/event.rs b/src/event.rs index 3e540c0..ba266b4 100644 --- a/src/event.rs +++ b/src/event.rs @@ -1240,6 +1240,265 @@ pub enum Name { /// ^ /// ``` GfmStrikethroughText, + /// GFM extension: Table. + /// + /// ## Info + /// + /// * **Context**: + /// [flow content][crate::construct::flow] + /// * **Content model**: + /// [`GfmTableBody`][Name::GfmTableBody], + /// [`GfmTableHead`][Name::GfmTableHead], + /// [`LineEnding`][Name::LineEnding] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// > | | - | + /// ^^^^^ + /// > | | b | + /// ^^^^^ + /// ``` + GfmTable, + /// GFM extension: Table body. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTable`][Name::GfmTable] + /// * **Content model**: + /// [`GfmTableRow`][Name::GfmTableRow], + /// [`LineEnding`][Name::LineEnding] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// | | - | + /// > | | b | + /// ^^^^^ + /// ``` + GfmTableBody, + /// GFM extension: Table cell. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableRow`][Name::GfmTableRow] + /// * **Content model**: + /// [`GfmTableCellDivider`][Name::GfmTableCellDivider], + /// [`GfmTableCellText`][Name::GfmTableCellText], + /// [`SpaceOrTab`][Name::SpaceOrTab] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// | | - | + /// > | | b | + /// ^^^^^ + /// ``` + GfmTableCell, + /// GFM extension: Table cell text. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableCell`][Name::GfmTableCell] + /// * **Content model**: + /// [text content][crate::construct::text] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^ + /// | | - | + /// > | | b | + /// ^ + /// ``` + GfmTableCellText, + /// GFM extension: Table cell divider. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableCell`][Name::GfmTableCell] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^ ^ + /// > | | - | + /// ^ ^ + /// > | | b | + /// ^ ^ + /// ``` + GfmTableCellDivider, + /// GFM extension: Table delimiter row. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableHead`][Name::GfmTableHead] + /// * **Content model**: + /// [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | - | + /// ^^^^^ + /// | | b | + /// ``` + GfmTableDelimiterRow, + /// GFM extension: Table delimiter alignment marker. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | :- | + /// ^ + /// | | b | + /// ``` + GfmTableDelimiterMarker, + /// GFM extension: Table delimiter cell. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow] + /// * **Content model**: + /// [`GfmTableCellDivider`][Name::GfmTableCellDivider], + /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue], + /// [`SpaceOrTab`][Name::SpaceOrTab] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | - | + /// ^^^^^ + /// | | b | + /// ``` + GfmTableDelimiterCell, + /// GFM extension: Table delimiter cell alignment. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell] + /// * **Content model**: + /// [`GfmTableDelimiterMarker`][Name::GfmTableDelimiterMarker], + /// [`GfmTableDelimiterFiller`][Name::GfmTableDelimiterFiller] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | - | + /// ^ + /// | | b | + /// ``` + GfmTableDelimiterCellValue, + /// GFM extension: Table delimiter filler. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | - | + /// ^ + /// | | b | + /// ``` + GfmTableDelimiterFiller, + /// GFM extension: Table head. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTable`][Name::GfmTable] + /// * **Content model**: + /// [`GfmTableRow`][Name::GfmTableRow], + /// [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow], + /// [`LineEnding`][Name::LineEnding] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// > | | - | + /// ^^^^^ + /// | | b | + /// ``` + GfmTableHead, + /// GFM extension: Table row. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableBody`][Name::GfmTableBody], + /// [`GfmTableHead`][Name::GfmTableHead] + /// * **Content model**: + /// [`GfmTableCell`][Name::GfmTableCell] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// | | - | + /// > | | b | + /// ^^^^^ + /// ``` + GfmTableRow, /// GFM extension: task list item check. /// /// ## Info @@ -2436,7 +2695,7 @@ pub enum Name { } /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 57] = [ +pub const VOID_EVENTS: [Name; 60] = [ Name::AttentionSequence, Name::AutolinkEmail, Name::AutolinkMarker, @@ -2469,6 +2728,9 @@ pub const VOID_EVENTS: [Name; 57] = [ Name::GfmFootnoteDefinitionLabelMarker, Name::GfmFootnoteDefinitionMarker, Name::GfmStrikethroughSequence, + Name::GfmTableCellDivider, + Name::GfmTableDelimiterMarker, + Name::GfmTableDelimiterFiller, Name::GfmTaskListItemMarker, Name::GfmTaskListItemValueChecked, Name::GfmTaskListItemValueUnchecked, @@ -191,6 +191,17 @@ pub struct Constructs { /// ^^^ /// ``` pub gfm_strikethrough: bool, + /// GFM: table. + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// > | | - | + /// ^^^^^ + /// > | | b | + /// ^^^^^ + /// ``` + pub gfm_table: bool, /// GFM: task list item. /// /// ```markdown @@ -317,6 +328,7 @@ impl Default for Constructs { gfm_label_start_footnote: false, gfm_footnote_definition: false, gfm_strikethrough: false, + gfm_table: false, gfm_task_list_item: false, hard_break_escape: true, hard_break_trailing: true, @@ -346,6 +358,7 @@ impl Constructs { gfm_footnote_definition: true, gfm_label_start_footnote: true, gfm_strikethrough: true, + gfm_table: true, gfm_task_list_item: true, ..Self::default() } diff --git a/src/resolve.rs b/src/resolve.rs index a62d382..d015213 100644 --- a/src/resolve.rs +++ b/src/resolve.rs @@ -19,6 +19,11 @@ pub enum Name { /// and what occurs before and after each sequence. /// Otherwise they are turned into data. Attention, + /// Resolve GFM tables. + /// + /// The table head, and later each row, are all parsed separately. + /// Resolving groups everything together, and groups cells. + GfmTable, /// Resolve heading (atx). /// /// Heading (atx) contains further sequences and data. @@ -60,6 +65,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) { let func = match name { Name::Label => construct::label_end::resolve, Name::Attention => construct::attention::resolve, + Name::GfmTable => construct::gfm_table::resolve, Name::HeadingAtx => construct::heading_atx::resolve, Name::HeadingSetext => construct::heading_setext::resolve, Name::ListItem => construct::list_item::resolve, diff --git a/src/state.rs b/src/state.rs index a42e802..5013ec8 100644 --- a/src/state.rs +++ b/src/state.rs @@ -123,6 +123,7 @@ pub enum Name { DocumentFlowEnd, FlowStart, + FlowBeforeGfmTable, FlowBeforeCodeIndented, FlowBeforeRaw, FlowBeforeHtml, @@ -309,6 +310,28 @@ pub enum Name { StringBefore, StringBeforeData, + GfmTableStart, + GfmTableHeadRowBefore, + GfmTableHeadRowStart, + GfmTableHeadRowBreak, + GfmTableHeadRowData, + GfmTableHeadRowEscape, + GfmTableHeadDelimiterStart, + GfmTableHeadDelimiterBefore, + GfmTableHeadDelimiterCellBefore, + GfmTableHeadDelimiterValueBefore, + GfmTableHeadDelimiterLeftAlignmentAfter, + GfmTableHeadDelimiterFiller, + GfmTableHeadDelimiterRightAlignmentAfter, + GfmTableHeadDelimiterCellAfter, + GfmTableHeadDelimiterNok, + + GfmTableBodyRowBefore, + GfmTableBodyRowStart, + GfmTableBodyRowBreak, + GfmTableBodyRowData, + GfmTableBodyRowEscape, + TextStart, TextBefore, TextBeforeHtml, @@ -445,6 +468,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::DocumentFlowInside => construct::document::flow_inside, Name::FlowStart => construct::flow::start, + Name::FlowBeforeGfmTable => construct::flow::before_gfm_table, Name::FlowBeforeCodeIndented => construct::flow::before_code_indented, Name::FlowBeforeRaw => construct::flow::before_raw, Name::FlowBeforeHtml => construct::flow::before_html, @@ -662,6 +686,33 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::StringBefore => construct::string::before, Name::StringBeforeData => construct::string::before_data, + Name::GfmTableStart => construct::gfm_table::start, + Name::GfmTableHeadRowBefore => construct::gfm_table::head_row_before, + Name::GfmTableHeadRowStart => construct::gfm_table::head_row_start, + Name::GfmTableHeadRowBreak => construct::gfm_table::head_row_break, + Name::GfmTableHeadRowData => construct::gfm_table::head_row_data, + Name::GfmTableHeadRowEscape => construct::gfm_table::head_row_escape, + + Name::GfmTableHeadDelimiterStart => construct::gfm_table::head_delimiter_start, + Name::GfmTableHeadDelimiterBefore => construct::gfm_table::head_delimiter_before, + Name::GfmTableHeadDelimiterCellBefore => construct::gfm_table::head_delimiter_cell_before, + Name::GfmTableHeadDelimiterValueBefore => construct::gfm_table::head_delimiter_value_before, + Name::GfmTableHeadDelimiterLeftAlignmentAfter => { + construct::gfm_table::head_delimiter_left_alignment_after + } + Name::GfmTableHeadDelimiterFiller => construct::gfm_table::head_delimiter_filler, + Name::GfmTableHeadDelimiterRightAlignmentAfter => { + construct::gfm_table::head_delimiter_right_alignment_after + } + Name::GfmTableHeadDelimiterCellAfter => construct::gfm_table::head_delimiter_cell_after, + Name::GfmTableHeadDelimiterNok => construct::gfm_table::head_delimiter_nok, + + Name::GfmTableBodyRowBefore => construct::gfm_table::body_row_before, + Name::GfmTableBodyRowStart => construct::gfm_table::body_row_start, + Name::GfmTableBodyRowBreak => construct::gfm_table::body_row_break, + Name::GfmTableBodyRowData => construct::gfm_table::body_row_data, + Name::GfmTableBodyRowEscape => construct::gfm_table::body_row_escape, + Name::TextStart => construct::text::start, Name::TextBefore => construct::text::before, Name::TextBeforeHtml => construct::text::before_html, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 9b73836..0ab8784 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -305,10 +305,12 @@ pub struct Tokenizer<'a> { /// /// Used when tokenizing [document content][crate::construct::document]. pub concrete: bool, - /// Whether this line is lazy. + /// Whether this row is piercing into the current construct with more + /// containers. /// - /// The previous line was a paragraph, and this line’s containers did not - /// match. + /// Used when tokenizing [document content][crate::construct::document]. + pub pierce: bool, + /// Whether this line is lazy: there are less containers than before. pub lazy: bool, } @@ -370,6 +372,7 @@ impl<'a> Tokenizer<'a> { }, map: EditMap::new(), interrupt: false, + pierce: true, concrete: false, lazy: false, resolvers: vec![], diff --git a/tests/gfm_table.rs b/tests/gfm_table.rs new file mode 100644 index 0000000..a265549 --- /dev/null +++ b/tests/gfm_table.rs @@ -0,0 +1,1782 @@ +extern crate micromark; +use micromark::{micromark, micromark_with_options, Constructs, Options}; +use pretty_assertions::assert_eq; + +#[test] +fn gfm_table() { + let gfm = Options { + constructs: Constructs::gfm(), + ..Options::default() + }; + + assert_eq!( + micromark("| a |\n| - |\n| b |"), + "<p>| a |\n| - |\n| b |</p>", + "should ignore tables by default" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n| b |", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>", + "should support tables" + ); + + assert_eq!( + micromark_with_options("| a |", &gfm), + "<p>| a |</p>", + "should not support a table w/ the head row ending in an eof (1)" + ); + + assert_eq!( + micromark_with_options("| a", &gfm), + "<p>| a</p>", + "should not support a table w/ the head row ending in an eof (2)" + ); + + assert_eq!( + micromark_with_options("a |", &gfm), + "<p>a |</p>", + "should not support a table w/ the head row ending in an eof (3)" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>", + "should support a table w/ a delimiter row ending in an eof (1)" + ); + + assert_eq!( + micromark_with_options("| a\n| -", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>", + "should support a table w/ a delimiter row ending in an eof (2)" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n| b |", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>", + "should support a table w/ a body row ending in an eof (1)" + ); + + assert_eq!( + micromark_with_options("| a\n| -\n| b", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>", + "should support a table w/ a body row ending in an eof (2)" + ); + + assert_eq!( + micromark_with_options("a|b\n-|-\nc|d", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>c</td>\n<td>d</td>\n</tr>\n</tbody>\n</table>", + "should support a table w/ a body row ending in an eof (3)" + ); + + assert_eq!( + micromark_with_options("| a \n| -\t\n| b | ", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>", + "should support rows w/ trailing whitespace (1)" + ); + + assert_eq!( + micromark_with_options("| a | \n| - |", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>", + "should support rows w/ trailing whitespace (2)" + ); + + assert_eq!( + micromark_with_options("| a |\n| - | ", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>", + "should support rows w/ trailing whitespace (3)" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n| b | ", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>", + "should support rows w/ trailing whitespace (4)" + ); + + assert_eq!( + micromark_with_options("||a|\n|-|-|", &gfm), + "<table>\n<thead>\n<tr>\n<th></th>\n<th>a</th>\n</tr>\n</thead>\n</table>", + "should support empty first header cells" + ); + + assert_eq!( + micromark_with_options("|a||\n|-|-|", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n<th></th>\n</tr>\n</thead>\n</table>", + "should support empty last header cells" + ); + + assert_eq!( + micromark_with_options("a||b\n-|-|-", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n<th></th>\n<th>b</th>\n</tr>\n</thead>\n</table>", + "should support empty header cells" + ); + + assert_eq!( + micromark_with_options("|a|b|\n|-|-|\n||c|", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td></td>\n<td>c</td>\n</tr>\n</tbody>\n</table>", + "should support empty first body cells" + ); + + assert_eq!( + micromark_with_options("|a|b|\n|-|-|\n|c||", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>c</td>\n<td></td>\n</tr>\n</tbody>\n</table>", + "should support empty last body cells" + ); + + assert_eq!( + micromark_with_options("a|b|c\n-|-|-\nd||e", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n<th>c</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>d</td>\n<td></td>\n<td>e</td>\n</tr>\n</tbody>\n</table>", + "should support empty body cells" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n- b", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<ul>\n<li>b</li>\n</ul>", + "should support a list after a table" + ); + + assert_eq!( + micromark_with_options("> | a |\n| - |", &gfm), + "<blockquote>\n<p>| a |\n| - |</p>\n</blockquote>", + "should not support a lazy delimiter row (1)" + ); + + assert_eq!( + micromark_with_options("> a\n> | b |\n| - |", &gfm), + "<blockquote>\n<p>a\n| b |\n| - |</p>\n</blockquote>", + "should not support a lazy delimiter row (2)" + ); + + assert_eq!( + micromark_with_options("| a |\n> | - |", &gfm), + "<p>| a |</p>\n<blockquote>\n<p>| - |</p>\n</blockquote>", + "should not support a piercing delimiter row" + ); + + assert_eq!( + micromark_with_options("> a\n> | b |\n|-", &gfm), + "<blockquote>\n<p>a\n| b |\n|-</p>\n</blockquote>", + "should not support a lazy body row (2)" + ); + + assert_eq!( + micromark_with_options("> | a |\n> | - |\n| b |", &gfm), + "<blockquote>\n<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n</blockquote>\n<p>| b |</p>", + "should not support a lazy body row (1)" + ); + + assert_eq!( + micromark_with_options("> a\n> | b |\n> | - |\n| c |", &gfm), + "<blockquote>\n<p>a</p>\n<table>\n<thead>\n<tr>\n<th>b</th>\n</tr>\n</thead>\n</table>\n</blockquote>\n<p>| c |</p>", + "should not support a lazy body row (2)" + ); + + assert_eq!( + micromark_with_options("> | A |\n> | - |\n> | 1 |\n| 2 |", &gfm), + "<blockquote>\n<table>\n<thead>\n<tr>\n<th>A</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>1</td>\n</tr>\n</tbody>\n</table>\n</blockquote>\n<p>| 2 |</p>", + "should not support a lazy body row (3)" + ); + + assert_eq!( + micromark_with_options(" - d\n - e", &gfm), + micromark(" - d\n - e"), + "should not change how lists and lazyness work" + ); + + assert_eq!( + micromark_with_options("| a |\n | - |", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>", + "should form a table if the delimiter row is indented w/ 3 spaces" + ); + + assert_eq!( + micromark_with_options("| a |\n | - |", &gfm), + "<p>| a |\n| - |</p>", + "should not form a table if the delimiter row is indented w/ 4 spaces" + ); + + assert_eq!( + micromark_with_options("| a |\n | - |", &Options { + constructs: Constructs { + code_indented: false, + ..Constructs::gfm() + }, + ..Options::default() + }), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>", + "should form a table if the delimiter row is indented w/ 4 spaces and indented code is turned off" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n> block quote?", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<blockquote>\n<p>block quote?</p>\n</blockquote>", + "should be interrupted by a block quote" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n>", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<blockquote>\n</blockquote>", + "should be interrupted by a block quote (empty)" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n- list?", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<ul>\n<li>list?</li>\n</ul>", + "should be interrupted by a list" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n-", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<ul>\n<li></li>\n</ul>", + "should be interrupted by a list (empty)" + ); + + assert_eq!( + micromark_with_options( + "| a |\n| - |\n<!-- HTML? -->", + &Options { + allow_dangerous_html: true, + constructs: Constructs::gfm(), + ..Options::default() + } + ), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<!-- HTML? -->", + "should be interrupted by HTML (flow)" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n\tcode?", &Options { + allow_dangerous_html: true, + constructs: Constructs::gfm(), + ..Options::default() + }), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<pre><code>code?\n</code></pre>", + "should be interrupted by code (indented)" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n```js\ncode?", &Options { + allow_dangerous_html: true, + constructs: Constructs::gfm(), + ..Options::default() + }), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<pre><code class=\"language-js\">code?\n</code></pre>\n", + "should be interrupted by code (fenced)" + ); + + assert_eq!( + micromark_with_options( + "| a |\n| - |\n***", + &Options { + allow_dangerous_html: true, + constructs: Constructs::gfm(), + ..Options::default() + } + ), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<hr />", + "should be interrupted by a thematic break" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\n# heading?", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<h1>heading?</h1>", + "should be interrupted by a heading (ATX)" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\nheading\n=", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>heading</td>\n</tr>\n<tr>\n<td>=</td>\n</tr>\n</tbody>\n</table>", + "should *not* be interrupted by a heading (setext)" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\nheading\n---", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>heading</td>\n</tr>\n</tbody>\n</table>\n<hr />", + "should *not* be interrupted by a heading (setext), but interrupt if the underline is also a thematic break" + ); + + assert_eq!( + micromark_with_options("| a |\n| - |\nheading\n-", &gfm), + "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>heading</td>\n</tr>\n</tbody>\n</table>\n<ul>\n<li></li>\n</ul>", + "should *not* be interrupted by a heading (setext), but interrupt if the underline is also an empty list item bullet" + ); + + assert_eq!( + micromark_with_options("a\nb\n-:", &gfm), + "<p>a</p>\n<table>\n<thead>\n<tr>\n<th align=\"right\">b</th>\n</tr>\n</thead>\n</table>", + "should support a single head row" + ); + + assert_eq!( + micromark_with_options("> | a |\n> | - |", &gfm), + "<blockquote>\n<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n</blockquote>", + "should support a table in a container" + ); + + assert_eq!( + micromark_with_options("> | a |\n| - |", &gfm), + "<blockquote>\n<p>| a |\n| - |</p>\n</blockquote>", + "should not support a lazy delimiter row if the head row is in a container" + ); + + assert_eq!( + micromark_with_options("| a |\n> | - |", &gfm), + "<p>| a |</p>\n<blockquote>\n<p>| - |</p>\n</blockquote>", + "should not support a “piercing” container for the delimiter row, if the head row was not in that container" + ); + + assert_eq!( + micromark_with_options("> | a |\n> | - |\n| c |", &gfm), + "<blockquote>\n<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n</blockquote>\n<p>| c |</p>", + "should not support a lazy body row if the head row and delimiter row are in a container" + ); + + assert_eq!( + micromark_with_options("> | a |\n| - |\n> | c |", &gfm), + "<blockquote>\n<p>| a |\n| - |\n| c |</p>\n</blockquote>", + "should not support a lazy delimiter row if the head row and a further body row are in a container" + ); + + assert_eq!(micromark_with_options("", &gfm), "", "should support"); + + assert_eq!(micromark_with_options("", &gfm), "", "should support"); + + assert_eq!( + micromark_with_options( + r###"# Align + +## An empty initial cell + +| | a|c| +|--|:----:|:---| +|a|b|c| +|a|b|c| + +## Missing alignment characters + +| a | b | c | +| |---|---| +| d | e | f | + +* * * + +| a | b | c | +|---|---| | +| d | e | f | + +## Incorrect characters + +| a | b | c | +|---|-*-|---| +| d | e | f | + +## Two alignments + +|a| +|::| + +|a| +|:-:| + +## Two at the start or end + +|a| +|::-| + +|a| +|-::| + +## In the middle + +|a| +|-:-| + +## A space in the middle + +|a| +|- -| + +## No pipe + +a +:-: + +a +:- + +a +-: + +## A single colon + +|a| +|:| + +a +: + +## Alignment on empty cells + +| a | b | c | d | e | +| - | - | :- | -: | :-: | +| f | +"###, + &gfm + ), + r###"<h1>Align</h1> +<h2>An empty initial cell</h2> +<table> +<thead> +<tr> +<th></th> +<th align="center">a</th> +<th align="left">c</th> +</tr> +</thead> +<tbody> +<tr> +<td>a</td> +<td align="center">b</td> +<td align="left">c</td> +</tr> +<tr> +<td>a</td> +<td align="center">b</td> +<td align="left">c</td> +</tr> +</tbody> +</table> +<h2>Missing alignment characters</h2> +<p>| a | b | c | +| |---|---| +| d | e | f |</p> +<hr /> +<p>| a | b | c | +|---|---| | +| d | e | f |</p> +<h2>Incorrect characters</h2> +<p>| a | b | c | +|---|-*-|---| +| d | e | f |</p> +<h2>Two alignments</h2> +<p>|a| +|::|</p> +<table> +<thead> +<tr> +<th align="center">a</th> +</tr> +</thead> +</table> +<h2>Two at the start or end</h2> +<p>|a| +|::-|</p> +<p>|a| +|-::|</p> +<h2>In the middle</h2> +<p>|a| +|-:-|</p> +<h2>A space in the middle</h2> +<p>|a| +|- -|</p> +<h2>No pipe</h2> +<table> +<thead> +<tr> +<th align="center">a</th> +</tr> +</thead> +</table> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +</table> +<table> +<thead> +<tr> +<th align="right">a</th> +</tr> +</thead> +</table> +<h2>A single colon</h2> +<p>|a| +|:|</p> +<p>a +:</p> +<h2>Alignment on empty cells</h2> +<table> +<thead> +<tr> +<th>a</th> +<th>b</th> +<th align="left">c</th> +<th align="right">d</th> +<th align="center">e</th> +</tr> +</thead> +<tbody> +<tr> +<td>f</td> +<td></td> +<td align="left"></td> +<td align="right"></td> +<td align="center"></td> +</tr> +</tbody> +</table> +"###, + "should match alignment like GitHub" + ); + + assert_eq!( + micromark_with_options( + r###"# Tables + +| a | b | c | +| - | - | - | +| d | e | f | + +## No body + +| a | b | c | +| - | - | - | + +## One column + +| a | +| - | +| b | +"###, + &gfm + ), + r###"<h1>Tables</h1> +<table> +<thead> +<tr> +<th>a</th> +<th>b</th> +<th>c</th> +</tr> +</thead> +<tbody> +<tr> +<td>d</td> +<td>e</td> +<td>f</td> +</tr> +</tbody> +</table> +<h2>No body</h2> +<table> +<thead> +<tr> +<th>a</th> +<th>b</th> +<th>c</th> +</tr> +</thead> +</table> +<h2>One column</h2> +<table> +<thead> +<tr> +<th>a</th> +</tr> +</thead> +<tbody> +<tr> +<td>b</td> +</tr> +</tbody> +</table> +"###, + "should match basic like GitHub" + ); + + assert_eq!( + micromark_with_options( + r###"# Tables in things + +## In lists + +* Unordered: + + | A | B | + | - | - | + | 1 | 2 | + +1. Ordered: + + | A | B | + | - | - | + | 1 | 2 | + +* Lazy? + | A | B | + | - | - | + | 1 | 2 | + | 3 | 4 | + | 5 | 6 | +| 7 | 8 | + +## In block quotes + +> W/ space: +> | A | B | +> | - | - | +> | 1 | 2 | + +>W/o space: +>| A | B | +>| - | - | +>| 1 | 2 | + +> Lazy? +> | A | B | +> | - | - | +> | 1 | 2 | +>| 3 | 4 | +| 5 | 6 | + +### List interrupting delimiters + +a | +- | + +a +-| + +a +|- +"###, + &gfm + ), + r###"<h1>Tables in things</h1> +<h2>In lists</h2> +<ul> +<li> +<p>Unordered:</p> +<table> +<thead> +<tr> +<th>A</th> +<th>B</th> +</tr> +</thead> +<tbody> +<tr> +<td>1</td> +<td>2</td> +</tr> +</tbody> +</table> +</li> +</ul> +<ol> +<li> +<p>Ordered:</p> +<table> +<thead> +<tr> +<th>A</th> +<th>B</th> +</tr> +</thead> +<tbody> +<tr> +<td>1</td> +<td>2</td> +</tr> +</tbody> +</table> +</li> +</ol> +<ul> +<li>Lazy? +<table> +<thead> +<tr> +<th>A</th> +<th>B</th> +</tr> +</thead> +</table> +</li> +</ul> +<p>| 1 | 2 | +| 3 | 4 | +| 5 | 6 | +| 7 | 8 |</p> +<h2>In block quotes</h2> +<blockquote> +<p>W/ space:</p> +<table> +<thead> +<tr> +<th>A</th> +<th>B</th> +</tr> +</thead> +<tbody> +<tr> +<td>1</td> +<td>2</td> +</tr> +</tbody> +</table> +</blockquote> +<blockquote> +<p>W/o space:</p> +<table> +<thead> +<tr> +<th>A</th> +<th>B</th> +</tr> +</thead> +<tbody> +<tr> +<td>1</td> +<td>2</td> +</tr> +</tbody> +</table> +</blockquote> +<blockquote> +<p>Lazy?</p> +<table> +<thead> +<tr> +<th>A</th> +<th>B</th> +</tr> +</thead> +<tbody> +<tr> +<td>1</td> +<td>2</td> +</tr> +<tr> +<td>3</td> +<td>4</td> +</tr> +</tbody> +</table> +</blockquote> +<p>| 5 | 6 |</p> +<h3>List interrupting delimiters</h3> +<p>a |</p> +<ul> +<li>|</li> +</ul> +<table> +<thead> +<tr> +<th>a</th> +</tr> +</thead> +</table> +<table> +<thead> +<tr> +<th>a</th> +</tr> +</thead> +</table> +"###, + "should match containers like GitHub" + ); + + assert_eq!( + micromark_with_options( + r###"| a | +| - | +| - | +| 1 | +"###, + &gfm + ), + r###"<table> +<thead> +<tr> +<th>a</th> +</tr> +</thead> +<tbody> +<tr> +<td>-</td> +</tr> +<tr> +<td>1</td> +</tr> +</tbody> +</table> +"###, + "should match a double delimiter row like GitHub" + ); + + assert_eq!( + micromark_with_options( + r###"# Examples from GFM + +## A + +| foo | bar | +| --- | --- | +| baz | bim | + +## B + +| abc | defghi | +:-: | -----------: +bar | baz + +## C + +| f\|oo | +| ------ | +| b `\|` az | +| b **\|** im | + +## D + +| abc | def | +| --- | --- | +| bar | baz | +> bar + +## E + +| abc | def | +| --- | --- | +| bar | baz | +bar + +bar + +## F + +| abc | def | +| --- | +| bar | + +## G + +| abc | def | +| --- | --- | +| bar | +| bar | baz | boo | + +## H + +| abc | def | +| --- | --- | +"###, + &gfm + ), + r###"<h1>Examples from GFM</h1> +<h2>A</h2> +<table> +<thead> +<tr> +<th>foo</th> +<th>bar</th> +</tr> +</thead> +<tbody> +<tr> +<td>baz</td> +<td>bim</td> +</tr> +</tbody> +</table> +<h2>B</h2> +<table> +<thead> +<tr> +<th align="center">abc</th> +<th align="right">defghi</th> +</tr> +</thead> +<tbody> +<tr> +<td align="center">bar</td> +<td align="right">baz</td> +</tr> +</tbody> +</table> +<h2>C</h2> +<table> +<thead> +<tr> +<th>f|oo</th> +</tr> +</thead> +<tbody> +<tr> +<td>b <code>|</code> az</td> +</tr> +<tr> +<td>b <strong>|</strong> im</td> +</tr> +</tbody> +</table> +<h2>D</h2> +<table> +<thead> +<tr> +<th>abc</th> +<th>def</th> +</tr> +</thead> +<tbody> +<tr> +<td>bar</td> +<td>baz</td> +</tr> +</tbody> +</table> +<blockquote> +<p>bar</p> +</blockquote> +<h2>E</h2> +<table> +<thead> +<tr> +<th>abc</th> +<th>def</th> +</tr> +</thead> +<tbody> +<tr> +<td>bar</td> +<td>baz</td> +</tr> +<tr> +<td>bar</td> +<td></td> +</tr> +</tbody> +</table> +<p>bar</p> +<h2>F</h2> +<p>| abc | def | +| --- | +| bar |</p> +<h2>G</h2> +<table> +<thead> +<tr> +<th>abc</th> +<th>def</th> +</tr> +</thead> +<tbody> +<tr> +<td>bar</td> +<td></td> +</tr> +<tr> +<td>bar</td> +<td>baz</td> +</tr> +</tbody> +</table> +<h2>H</h2> +<table> +<thead> +<tr> +<th>abc</th> +<th>def</th> +</tr> +</thead> +</table> +"###, + "should match examples from the GFM spec like GitHub" + ); + + assert_eq!( + micromark_with_options( + r###"# Grave accents + +## Grave accent in cell + +| A | B | +|--------------|---| +| <kbd>`</kbd> | C | + +## Escaped grave accent in “inline code” in cell + +| A | +|-----| +| `\` | + +## “Empty” inline code + +| 1 | 2 | 3 | +|---|------|----| +| a | `` | | +| b | `` | `` | +| c | ` | ` | +| d | `|` | +| e | `\|` | | +| f | \| | | + +## Escaped pipes in code in cells + +| `\|\\` | +| --- | +| `\|\\` | + +`\|\\` +"###, + &Options { + allow_dangerous_html: true, + ..gfm.clone() + } + ), + r###"<h1>Grave accents</h1> +<h2>Grave accent in cell</h2> +<table> +<thead> +<tr> +<th>A</th> +<th>B</th> +</tr> +</thead> +<tbody> +<tr> +<td><kbd>`</kbd></td> +<td>C</td> +</tr> +</tbody> +</table> +<h2>Escaped grave accent in “inline code” in cell</h2> +<table> +<thead> +<tr> +<th>A</th> +</tr> +</thead> +<tbody> +<tr> +<td><code>\</code></td> +</tr> +</tbody> +</table> +<h2>“Empty” inline code</h2> +<table> +<thead> +<tr> +<th>1</th> +<th>2</th> +<th>3</th> +</tr> +</thead> +<tbody> +<tr> +<td>a</td> +<td>``</td> +<td></td> +</tr> +<tr> +<td>b</td> +<td>``</td> +<td>``</td> +</tr> +<tr> +<td>c</td> +<td>`</td> +<td>`</td> +</tr> +<tr> +<td>d</td> +<td>`</td> +<td>`</td> +</tr> +<tr> +<td>e</td> +<td><code>|</code></td> +<td></td> +</tr> +<tr> +<td>f</td> +<td>|</td> +<td></td> +</tr> +</tbody> +</table> +<h2>Escaped pipes in code in cells</h2> +<table> +<thead> +<tr> +<th><code>|\\</code></th> +</tr> +</thead> +<tbody> +<tr> +<td><code>|\\</code></td> +</tr> +</tbody> +</table> +<p><code>\|\\</code></p> +"###, + "should match grave accent like GitHub" + ); + + assert_eq!( + micromark_with_options( + r###"# Code + +## Indented delimiter row + +a + |- + +a + |- + +## Indented body + +| a | + | - | + | C | + | D | + | E | +"###, + &gfm + ), + r###"<h1>Code</h1> +<h2>Indented delimiter row</h2> +<table> +<thead> +<tr> +<th>a</th> +</tr> +</thead> +</table> +<p>a +|-</p> +<h2>Indented body</h2> +<table> +<thead> +<tr> +<th>a</th> +</tr> +</thead> +<tbody> +<tr> +<td>C</td> +</tr> +<tr> +<td>D</td> +</tr> +</tbody> +</table> +<pre><code>| E | +</code></pre> +"###, + "should match indent like GitHub" + ); + + assert_eq!( + micromark_with_options( + r###"## Blank line + +a +:- +b + +c + +## Block quote + +a +:- +b +> c + +## Code (fenced) + +a +:- +b +``` +c +``` + +## Code (indented) + +a +:- +b + c + +## Definition + +a +:- +b +[c]: d + +## Heading (atx) + +a +:- +b +# c + + +## Heading (setext) (rank 1) + +a +:- +b +== +c + +## Heading (setext) (rank 2) + +a +:- +b +-- +c + +## HTML (flow, kind 1: raw) + +a +:- +b +<pre> + a +</pre> + +## HTML (flow, kind 2: comment) + +a +:- +b +<!-- c --> + +## HTML (flow, kind 3: instruction) + +a +:- +b +<? c ?> + +## HTML (flow, kind 4: declaration) + +a +:- +b +<!C> + +## HTML (flow, kind 5: cdata) + +a +:- +b +<![CDATA[c]]> + +## HTML (flow, kind 6: basic) + +a +:- +b +<div> + +## HTML (flow, kind 7: complete) + +a +:- +b +<x> + +## List (ordered, 1) + +a +:- +b +1. c + +## List (ordered, other) + +a +:- +b +2. c + +## List (unordered) + +a +:- +b +* c + +## List (unordered, blank) + +a +:- +b +* +c + +## List (unordered, blank start) + +a +:- +b +* + c + +## Thematic break + +a +:- +b +*** +"###, + &Options { + allow_dangerous_html: true, + ..gfm.clone() + } + ), + r###"<h2>Blank line</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<p>c</p> +<h2>Block quote</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<blockquote> +<p>c</p> +</blockquote> +<h2>Code (fenced)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<pre><code>c +</code></pre> +<h2>Code (indented)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<pre><code>c +</code></pre> +<h2>Definition</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +<tr> +<td align="left">[c]: d</td> +</tr> +</tbody> +</table> +<h2>Heading (atx)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<h1>c</h1> +<h2>Heading (setext) (rank 1)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +<tr> +<td align="left">==</td> +</tr> +<tr> +<td align="left">c</td> +</tr> +</tbody> +</table> +<h2>Heading (setext) (rank 2)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +<tr> +<td align="left">--</td> +</tr> +<tr> +<td align="left">c</td> +</tr> +</tbody> +</table> +<h2>HTML (flow, kind 1: raw)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<pre> + a +</pre> +<h2>HTML (flow, kind 2: comment)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<!-- c --> +<h2>HTML (flow, kind 3: instruction)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<? c ?> +<h2>HTML (flow, kind 4: declaration)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<!C> +<h2>HTML (flow, kind 5: cdata)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<![CDATA[c]]> +<h2>HTML (flow, kind 6: basic)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<div> +<h2>HTML (flow, kind 7: complete)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<x> +<h2>List (ordered, 1)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<ol> +<li>c</li> +</ol> +<h2>List (ordered, other)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<ol start="2"> +<li>c</li> +</ol> +<h2>List (unordered)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<ul> +<li>c</li> +</ul> +<h2>List (unordered, blank)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<ul> +<li></li> +</ul> +<p>c</p> +<h2>List (unordered, blank start)</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<ul> +<li>c</li> +</ul> +<h2>Thematic break</h2> +<table> +<thead> +<tr> +<th align="left">a</th> +</tr> +</thead> +<tbody> +<tr> +<td align="left">b</td> +</tr> +</tbody> +</table> +<hr /> +"###, + "should match interrupt like GitHub" + ); + + assert_eq!( + micromark_with_options( + r###"# Loose + +## Loose + +Header 1 | Header 2 +-------- | -------- +Cell 1 | Cell 2 +Cell 3 | Cell 4 + +## One “column”, loose + +a +- +b + +## No pipe in first row + +a +| - | +"###, + &gfm + ), + r###"<h1>Loose</h1> +<h2>Loose</h2> +<table> +<thead> +<tr> +<th>Header 1</th> +<th>Header 2</th> +</tr> +</thead> +<tbody> +<tr> +<td>Cell 1</td> +<td>Cell 2</td> +</tr> +<tr> +<td>Cell 3</td> +<td>Cell 4</td> +</tr> +</tbody> +</table> +<h2>One “column”, loose</h2> +<h2>a</h2> +<p>b</p> +<h2>No pipe in first row</h2> +<table> +<thead> +<tr> +<th>a</th> +</tr> +</thead> +</table> +"###, + "should match loose tables like GitHub" + ); + + assert_eq!( + micromark_with_options( + r###"# Some more escapes + +| Head | +| ------------- | +| A | Alpha | +| B \| Bravo | +| C \\| Charlie | +| D \\\| Delta | +| E \\\\| Echo | + +Note: GH has a bug where in case C and E, the escaped escape is treated as a +normal escape: <https://github.com/github/cmark-gfm/issues/277>. +"###, + &gfm + ), + r###"<h1>Some more escapes</h1> +<table> +<thead> +<tr> +<th>Head</th> +</tr> +</thead> +<tbody> +<tr> +<td>A</td> +</tr> +<tr> +<td>B | Bravo</td> +</tr> +<tr> +<td>C \</td> +</tr> +<tr> +<td>D \| Delta</td> +</tr> +<tr> +<td>E \\</td> +</tr> +</tbody> +</table> +<p>Note: GH has a bug where in case C and E, the escaped escape is treated as a +normal escape: <a href="https://github.com/github/cmark-gfm/issues/277">https://github.com/github/cmark-gfm/issues/277</a>.</p> +"###, + "should match loose escapes like GitHub" + ); +} diff --git a/tests/heading_setext.rs b/tests/heading_setext.rs index 22155f0..fa979be 100644 --- a/tests/heading_setext.rs +++ b/tests/heading_setext.rs @@ -258,6 +258,18 @@ fn heading_setext() { ); assert_eq!( + micromark("a\n- ==="), + "<p>a</p>\n<ul>\n<li>===</li>\n</ul>", + "should not support piercing (1)" + ); + + assert_eq!( + micromark("a\n* ---"), + "<p>a</p>\n<ul>\n<li>\n<hr />\n</li>\n</ul>", + "should not support piercing (2)" + ); + + assert_eq!( micromark_with_options( "a\n-", &Options { |