aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-31 16:50:20 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-31 16:50:20 +0200
commitb1590a4fb0c28fdb6af866ea79c186ea57284493 (patch)
tree61264dc36135e7dae34a04992a99b9f3f71e7b8e
parent670f1d82e01ea2394b21d7d1857f41bdc67b3fce (diff)
downloadmarkdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.tar.gz
markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.tar.bz2
markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.zip
Add support for GFM tables
-rw-r--r--src/compiler.rs235
-rw-r--r--src/construct/document.rs16
-rw-r--r--src/construct/flow.rs41
-rw-r--r--src/construct/gfm_autolink_literal.rs1
-rw-r--r--src/construct/gfm_footnote_definition.rs2
-rw-r--r--src/construct/gfm_table.rs1042
-rw-r--r--src/construct/heading_setext.rs1
-rw-r--r--src/construct/label_end.rs1
-rw-r--r--src/construct/mod.rs2
-rw-r--r--src/event.rs264
-rw-r--r--src/lib.rs13
-rw-r--r--src/resolve.rs6
-rw-r--r--src/state.rs51
-rw-r--r--src/tokenizer.rs9
-rw-r--r--tests/gfm_table.rs1782
-rw-r--r--tests/heading_setext.rs12
16 files changed, 3442 insertions, 36 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 9057505..5626f8a 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -68,6 +68,43 @@ struct Definition {
title: Option<String>,
}
+/// GFM table: column alignment.
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+enum GfmTableAlign {
+ /// No alignment.
+ ///
+ /// ```markdown
+ /// | | aaa |
+ /// > | | --- |
+ /// ^^^
+ /// ```
+ None,
+ /// Left alignment.
+ ///
+ /// ```markdown
+ /// | | aaa |
+ /// > | | :-- |
+ /// ^^^
+ /// ```
+ Left,
+ /// Center alignment.
+ ///
+ /// ```markdown
+ /// | | aaa |
+ /// > | | :-: |
+ /// ^^^
+ /// ```
+ Center,
+ /// Right alignment.
+ ///
+ /// ```markdown
+ /// | | aaa |
+ /// > | | --: |
+ /// ^^^
+ /// ```
+ Right,
+}
+
/// Context used to compile markdown.
#[allow(clippy::struct_excessive_bools)]
#[derive(Debug)]
@@ -107,6 +144,12 @@ struct CompileContext<'a> {
pub gfm_footnote_definitions: Vec<(String, String)>,
pub gfm_footnote_definition_calls: Vec<(String, usize)>,
pub gfm_footnote_definition_stack: Vec<(usize, usize)>,
+ /// Whether we are in a GFM table head.
+ pub gfm_table_in_head: bool,
+ /// Current GFM table alignment.
+ pub gfm_table_align: Option<Vec<GfmTableAlign>>,
+ /// Current GFM table column.
+ pub gfm_table_column: usize,
// Fields used to influance the current compilation.
/// Ignore the next line ending.
pub slurp_one_line_ending: bool,
@@ -153,6 +196,9 @@ impl<'a> CompileContext<'a> {
gfm_footnote_definitions: vec![],
gfm_footnote_definition_calls: vec![],
gfm_footnote_definition_stack: vec![],
+ gfm_table_in_head: false,
+ gfm_table_align: None,
+ gfm_table_column: 0,
tight_stack: vec![],
slurp_one_line_ending: false,
image_alt_inside: false,
@@ -350,6 +396,11 @@ fn enter(context: &mut CompileContext) {
Name::GfmFootnoteDefinition => on_enter_gfm_footnote_definition(context),
Name::GfmFootnoteCall => on_enter_gfm_footnote_call(context),
Name::GfmStrikethrough => on_enter_gfm_strikethrough(context),
+ Name::GfmTable => on_enter_gfm_table(context),
+ Name::GfmTableBody => on_enter_gfm_table_body(context),
+ Name::GfmTableCell => on_enter_gfm_table_cell(context),
+ Name::GfmTableHead => on_enter_gfm_table_head(context),
+ Name::GfmTableRow => on_enter_gfm_table_row(context),
Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context),
Name::HtmlFlow => on_enter_html_flow(context),
Name::HtmlText => on_enter_html_text(context),
@@ -407,6 +458,11 @@ fn exit(context: &mut CompileContext) {
Name::GfmFootnoteDefinitionPrefix => on_exit_gfm_footnote_definition_prefix(context),
Name::GfmFootnoteDefinition => on_exit_gfm_footnote_definition(context),
Name::GfmStrikethrough => on_exit_gfm_strikethrough(context),
+ Name::GfmTable => on_exit_gfm_table(context),
+ Name::GfmTableBody => on_exit_gfm_table_body(context),
+ Name::GfmTableCell => on_exit_gfm_table_cell(context),
+ Name::GfmTableHead => on_exit_gfm_table_head(context),
+ Name::GfmTableRow => on_exit_gfm_table_row(context),
Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context),
Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context),
Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context),
@@ -536,6 +592,105 @@ fn on_enter_gfm_strikethrough(context: &mut CompileContext) {
}
}
+/// Handle [`Enter`][Kind::Enter]:[`GfmTable`][Name::GfmTable].
+fn on_enter_gfm_table(context: &mut CompileContext) {
+ // Find the alignment.
+ let mut index = context.index;
+ let mut in_delimiter_row = false;
+ let mut align = vec![];
+
+ while index < context.events.len() {
+ let event = &context.events[index];
+
+ if in_delimiter_row {
+ if event.kind == Kind::Enter {
+ // Start of alignment value: set a new column.
+ if event.name == Name::GfmTableDelimiterCellValue {
+ align.push(
+ if context.events[index + 1].name == Name::GfmTableDelimiterMarker {
+ GfmTableAlign::Left
+ } else {
+ GfmTableAlign::None
+ },
+ );
+ }
+ } else {
+ // End of alignment value: change the column.
+ if event.name == Name::GfmTableDelimiterCellValue {
+ if context.events[index - 1].name == Name::GfmTableDelimiterMarker {
+ let align_index = align.len() - 1;
+ align[align_index] = if align[align_index] == GfmTableAlign::Left {
+ GfmTableAlign::Center
+ } else {
+ GfmTableAlign::Right
+ }
+ }
+ }
+ // Done!
+ else if event.name == Name::GfmTableDelimiterRow {
+ break;
+ }
+ }
+ } else if event.kind == Kind::Enter && event.name == Name::GfmTableDelimiterRow {
+ in_delimiter_row = true;
+ }
+
+ index += 1;
+ }
+
+ // Generate.
+ context.gfm_table_align = Some(align);
+ context.line_ending_if_needed();
+ context.push("<table>");
+}
+
+/// Handle [`Enter`][Kind::Enter]:[`GfmTableBody`][Name::GfmTableBody].
+fn on_enter_gfm_table_body(context: &mut CompileContext) {
+ context.push("<tbody>");
+}
+
+/// Handle [`Enter`][Kind::Enter]:[`GfmTableCell`][Name::GfmTableCell].
+fn on_enter_gfm_table_cell(context: &mut CompileContext) {
+ let column = context.gfm_table_column;
+ let align = context.gfm_table_align.as_ref().unwrap();
+
+ if column >= align.len() {
+ // Capture cell to ignore it.
+ context.buffer();
+ } else {
+ let value = align[column];
+ context.line_ending_if_needed();
+
+ if context.gfm_table_in_head {
+ context.push("<th");
+ } else {
+ context.push("<td");
+ }
+
+ match value {
+ GfmTableAlign::Left => context.push(" align=\"left\""),
+ GfmTableAlign::Right => context.push(" align=\"right\""),
+ GfmTableAlign::Center => context.push(" align=\"center\""),
+ GfmTableAlign::None => {}
+ }
+
+ context.push(">");
+ }
+}
+
+/// Handle [`Enter`][Kind::Enter]:[`GfmTableHead`][Name::GfmTableHead].
+fn on_enter_gfm_table_head(context: &mut CompileContext) {
+ context.line_ending_if_needed();
+ context.push("<thead>");
+ context.gfm_table_in_head = true;
+}
+
+/// Handle [`Enter`][Kind::Enter]:[`GfmTableRow`][Name::GfmTableRow].
+fn on_enter_gfm_table_row(context: &mut CompileContext) {
+ context.line_ending_if_needed();
+ context.push("<tr>");
+}
+
/// Handle [`Enter`][Kind::Enter]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
fn on_enter_gfm_task_list_item_check(context: &mut CompileContext) {
if !context.image_alt_inside {
@@ -892,7 +1047,24 @@ fn on_exit_raw_flow(context: &mut CompileContext) {
/// Handle [`Exit`][Kind::Exit]:{[`CodeText`][Name::CodeText],[`MathText`][Name::MathText]}.
fn on_exit_raw_text(context: &mut CompileContext) {
let result = context.resume();
- let mut bytes = result.as_bytes();
+ let mut bytes = result.as_bytes().to_vec();
+
+ // If we are in a GFM table, we need to decode escaped pipes.
+ // This is a rather weird GFM feature.
+ if context.gfm_table_align.is_some() {
+ let mut index = 0;
+ let mut len = bytes.len();
+
+ while index < len {
+ if index + 1 < len && bytes[index] == b'\\' && bytes[index + 1] == b'|' {
+ bytes.remove(index);
+ len -= 1;
+ }
+
+ index += 1;
+ }
+ }
+
let mut trim = false;
let mut index = 0;
let mut end = bytes.len();
@@ -910,11 +1082,12 @@ fn on_exit_raw_text(context: &mut CompileContext) {
}
if trim {
- bytes = &bytes[1..end];
+ bytes.remove(0);
+ bytes.pop();
}
context.raw_text_inside = false;
- context.push(str::from_utf8(bytes).unwrap());
+ context.push(str::from_utf8(&bytes).unwrap());
if !context.image_alt_inside {
context.push("</code>");
@@ -1113,6 +1286,62 @@ fn on_exit_gfm_strikethrough(context: &mut CompileContext) {
}
}
+/// Handle [`Exit`][Kind::Exit]:[`GfmTable`][Name::GfmTable].
+fn on_exit_gfm_table(context: &mut CompileContext) {
+ context.gfm_table_align = None;
+ context.line_ending_if_needed();
+ context.push("</table>");
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmTableBody`][Name::GfmTableBody].
+fn on_exit_gfm_table_body(context: &mut CompileContext) {
+ context.line_ending_if_needed();
+ context.push("</tbody>");
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmTableCell`][Name::GfmTableCell].
+fn on_exit_gfm_table_cell(context: &mut CompileContext) {
+ let align = context.gfm_table_align.as_ref().unwrap();
+
+ if context.gfm_table_column < align.len() {
+ if context.gfm_table_in_head {
+ context.push("</th>");
+ } else {
+ context.push("</td>");
+ }
+ } else {
+ // Stop capturing.
+ context.resume();
+ }
+
+ context.gfm_table_column += 1;
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmTableHead`][Name::GfmTableHead].
+fn on_exit_gfm_table_head(context: &mut CompileContext) {
+ context.gfm_table_in_head = false;
+ context.line_ending_if_needed();
+ context.push("</thead>");
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmTableRow`][Name::GfmTableRow].
+fn on_exit_gfm_table_row(context: &mut CompileContext) {
+ let mut column = context.gfm_table_column;
+ let len = context.gfm_table_align.as_ref().unwrap().len();
+
+ // Add “phantom” cells, for body rows that are shorter than the delimiter
+ // row (which is equal to the head row).
+ while column < len {
+ on_enter_gfm_table_cell(context);
+ on_exit_gfm_table_cell(context);
+ column += 1;
+ }
+
+ context.gfm_table_column = 0;
+ context.line_ending_if_needed();
+ context.push("</tr>");
+}
+
/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) {
if !context.image_alt_inside {
diff --git a/src/construct/document.rs b/src/construct/document.rs
index 9c76e46..e31e58d 100644
--- a/src/construct/document.rs
+++ b/src/construct/document.rs
@@ -269,6 +269,14 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
exit_containers(tokenizer, &Phase::Prefix);
}
+ // We are “piercing” into the flow with a new container.
+ tokenizer
+ .tokenize_state
+ .document_child
+ .as_mut()
+ .unwrap()
+ .pierce = true;
+
tokenizer
.tokenize_state
.document_container_stack
@@ -398,12 +406,11 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
let mut stack_index = child.stack.len();
// Use two algo’s: one for when we’re suspended or in multiline things
- // like definitions, another (b) for when we fed the line ending and closed
- // a)
+ // like definitions, another for when we fed the line ending and closed.
while !document_lazy_continuation_current && stack_index > 0 {
stack_index -= 1;
let name = &child.stack[stack_index];
- if name == &Name::Paragraph || name == &Name::Definition {
+ if name == &Name::Paragraph || name == &Name::Definition || name == &Name::GfmTableHead {
document_lazy_continuation_current = true;
}
}
@@ -418,6 +425,9 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
}
}
+ // Reset “piercing”.
+ child.pierce = false;
+
if child.lazy
&& tokenizer.tokenize_state.document_lazy_accepting_before
&& document_lazy_continuation_current
diff --git a/src/construct/flow.rs b/src/construct/flow.rs
index 3f1cd77..3f7bc9c 100644
--- a/src/construct/flow.rs
+++ b/src/construct/flow.rs
@@ -65,29 +65,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::HtmlFlowStart)
}
- // Note: `-` is also used in thematic breaks so it’s not included here.
- Some(b'=') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::HeadingSetextStart)
- }
- Some(b'[') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::DefinitionStart)
- }
// Actual parsing: blank line? Indented code? Indented anything?
- // Also includes `-` which can be a setext heading underline or thematic break.
- None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
- // Must be a paragraph.
- Some(_) => {
- tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
- State::Retry(StateName::ParagraphStart)
- }
+ // Tables, setext heading underlines, definitions, and paragraphs are
+ // particularly weird.
+ _ => State::Retry(StateName::FlowBlankLineBefore),
}
}
@@ -185,11 +166,25 @@ pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State {
pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeDefinition),
+ State::Next(StateName::FlowBeforeGfmTable),
);
State::Retry(StateName::ThematicBreakStart)
}
+/// At GFM table.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// ```
+pub fn before_gfm_table(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeDefinition),
+ );
+ State::Retry(StateName::GfmTableStart)
+}
+
/// At definition.
///
/// ```markdown
diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs
index 7fdeb01..704c536 100644
--- a/src/construct/gfm_autolink_literal.rs
+++ b/src/construct/gfm_autolink_literal.rs
@@ -5,7 +5,6 @@ use crate::tokenizer::Tokenizer;
use crate::util::classify_character::{classify, Kind as CharacterKind};
use crate::util::slice::{Position, Slice};
use alloc::vec::Vec;
-extern crate std;
use core::str;
// To do: doc al functions.
diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs
index 3715044..cbe816f 100644
--- a/src/construct/gfm_footnote_definition.rs
+++ b/src/construct/gfm_footnote_definition.rs
@@ -141,7 +141,7 @@
//!
//! ## References
//!
-//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-footnote)
+//! * [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote)
//!
//! > 👉 **Note**: Footnotes are not specified in GFM yet.
//! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270)
diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs
new file mode 100644
index 0000000..d7c2b69
--- /dev/null
+++ b/src/construct/gfm_table.rs
@@ -0,0 +1,1042 @@
+//! GFM: table occurs in the [flow][] content type.
+//!
+//! ## Grammar
+//!
+//! Tables form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! gfm_table ::= gfm_table_head 0*(eol gfm_table_body_row)
+//!
+//! ; Restriction: both rows must have the same number of cells.
+//! gfm_table_head ::= gfm_table_row eol gfm_table_delimiter_row
+//!
+//! gfm_table_row ::= ['|'] gfm_table_cell 0*('|' gfm_table_cell) ['|'] *space_or_tab
+//! gfm_table_cell ::= *space_or_tab gfm_table_text *space_or_tab
+//! gfm_table_text ::= 0*(line - '\\' - '|' | '\\' ['\\' | '|'])
+//
+//! gfm_table_delimiter_row ::= ['|'] gfm_table_delimiter_cell 0*('|' gfm_table_delimiter_cell) ['|'] *space_or_tab
+//! gfm_table_delimiter_cell ::= *space_or_tab gfm_table_delimiter_value *space_or_tab
+//! gfm_table_delimiter_value ::= [':'] 1*'-' [':']
+//! ```
+//!
+//! As this construct occurs in flow, like all flow constructs, it must be
+//! followed by an eol (line ending) or eof (end of file).
+//!
+//! The above grammar shows that basically anything can be a cell or a row.
+//! The main thing that makes something a row, is that it occurs directly before
+//! or after a delimiter row, or after another row.
+//!
+//! It is not required for a table to have a body: it can end right after the
+//! delimiter row.
+//!
+//! Each column can be marked with an alignment.
+//! The alignment marker is a colon (`:`) used before and/or after delimiter row
+//! filler.
+//! To illustrate:
+//!
+//! ```markdown
+//! | none | left | right | center |
+//! | ---- | :--- | ----: | :----: |
+//! ```
+//!
+//! The number of cells in the delimiter row, is the number of columns of the
+//! table.
+//! Only the head row is required to have the same number of cells.
+//! Body rows are not required to have a certain number of cells.
+//! For body rows that have less cells than the number of columns of the table,
+//! empty cells are injected.
+//! When a row has more cells than the number of columns of the table, the
+//! superfluous cells are dropped.
+//! To illustrate:
+//!
+//! ```markdown
+//! | a | b |
+//! | - | - |
+//! | c |
+//! | d | e | f |
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <table>
+//! <thead>
+//! <tr>
+//! <th>a</th>
+//! <th>b</th>
+//! </tr>
+//! </thead>
+//! <tbody>
+//! <tr>
+//! <td>c</td>
+//! <td></td>
+//! </tr>
+//! <tr>
+//! <td>d</td>
+//! <td>e</td>
+//! </tr>
+//! </tbody>
+//! </table>
+//! ```
+//!
+//! Each cell’s text is interpreted as the [text][] content type.
+//! That means that it can include constructs such as [attention][attention].
+//!
+//! The grammar for cells prohibits the use of `|` in them.
+//! To use pipes in cells, encode them as a character reference or character
+//! escape: `&vert;` (or `&VerticalLine;`, `&verbar;`, `&#124;`, `&#x7c;`) or
+//! `\|`.
+//!
+//! Escapes will typically work, but they are not supported in
+//! [code (text)][raw_text] (and the math (text) extension).
+//! To work around this, GitHub came up with a rather weird “trick”.
+//! When inside a table cell *and* inside code, escaped pipes *are* decoded.
+//! To illustrate:
+//!
+//! ```markdown
+//! | Name | Character |
+//! | - | - |
+//! | Left curly brace | `{` |
+//! | Pipe | `\|` |
+//! | Right curly brace | `}` |
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <table>
+//! <thead>
+//! <tr>
+//! <th>Name</th>
+//! <th>Character</th>
+//! </tr>
+//! </thead>
+//! <tbody>
+//! <tr>
+//! <td>Left curly brace</td>
+//! <td><code>{</code></td>
+//! </tr>
+//! <tr>
+//! <td>Pipe</td>
+//! <td><code>|</code></td>
+//! </tr>
+//! <tr>
+//! <td>Right curly brace</td>
+//! <td><code>}</code></td>
+//! </tr>
+//! </tbody>
+//! </table>
+//! ```
+//!
+//! > 👉 **Note**: no other character can be escaped like this.
+//! > Escaping pipes in code does not work when not inside a table, either.
+//!
+//! ## HTML
+//!
+//! GFM tables relate to several HTML elements: `<table>`, `<tbody>`, `<td>`,
+//! `<th>`, `<thead>`, and `<tr>`.
+//! See
+//! [*§ 4.9.1 The `table` element*][html_table],
+//! [*§ 4.9.5 The `tbody` element*][html_tbody],
+//! [*§ 4.9.9 The `td` element*][html_td],
+//! [*§ 4.9.10 The `th` element*][html_th],
+//! [*§ 4.9.6 The `thead` element*][html_thead], and
+//! [*§ 4.9.8 The `tr` element*][html_tr]
+//! in the HTML spec for more info.
+//!
+//! If the the alignment of a column is left, right, or center, a deprecated
+//! `align` attribute is added to each `<th>` and `<td>` element belonging to
+//! that column.
+//! That attribute is interpreted by browsers as if a CSS `text-align` property
+//! was included, with its value set to that same keyword.
+//!
+//! ## Recommendation
+//!
+//! When authoring markdown with GFM tables, it’s recommended to *always* put
+//! pipes around cells.
+//! Without them, it can be hard to infer whether the table will work, how many
+//! columns there are, and which column you are currently editing.
+//!
+//! It is recommended to not use many columns, as it results in very long lines,
+//! making it hard to infer which column you are currently editing.
+//!
+//! For larger tables, particularly when cells vary in size, it is recommended
+//! *not* to manually “pad” cell text.
+//! While it can look better, it results in a lot of time spent realigning
+//! everything when a new, longer cell is added or the longest cell removed, as
+//! every row then must be changed.
+//! Other than costing time, it also causes large diffs in Git.
+//!
+//! To illustrate, when authoring large tables, it is discouraged to pad cells
+//! like this:
+//!
+//! ```markdown
+//! | Alpha bravo charlie | delta |
+//! | ------------------- | -----------------: |
+//! | Echo | Foxtrot golf hotel |
+//! ```
+//!
+//! Instead, use single spaces (and single filler dashes):
+//!
+//! ```markdown
+//! | Alpha bravo charlie | delta |
+//! | - | -: |
+//! | Echo | Foxtrot golf hotel |
+//! ```
+//!
+//! ## Bugs
+//!
+//! GitHub’s own algorithm to parse tables contains a bug.
+//! This bug is not present in this project.
+//! The issue relating to tables is:
+//!
+//! * [GFM tables: escaped escapes are incorrectly treated as escapes](https://github.com/github/cmark-gfm/issues/277)\
+//!
+//! ## Tokens
+//!
+//! * [`GfmTable`][Name::GfmTable]
+//! * [`GfmTableBody`][Name::GfmTableBody]
+//! * [`GfmTableCell`][Name::GfmTableCell]
+//! * [`GfmTableCellDivider`][Name::GfmTableCellDivider]
+//! * [`GfmTableCellText`][Name::GfmTableCellText]
+//! * [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell]
+//! * [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue]
+//! * [`GfmTableDelimiterFiller`][Name::GfmTableDelimiterFiller]
+//! * [`GfmTableDelimiterMarker`][Name::GfmTableDelimiterMarker]
+//! * [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow]
+//! * [`GfmTableHead`][Name::GfmTableHead]
+//! * [`GfmTableRow`][Name::GfmTableRow]
+//! * [`LineEnding`][Name::LineEnding]
+//!
+//! ## References
+//!
+//! * [`micromark-extension-gfm-table`](https://github.com/micromark/micromark-extension-gfm-table)
+//! * [*§ 4.10 Tables (extension)* in `GFM`](https://github.github.com/gfm/#tables-extension-)
+//!
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
+//! [attention]: crate::construct::attention
+//! [raw_text]: crate::construct::raw_text
+//! [html_table]: https://html.spec.whatwg.org/multipage/tables.html#the-table-element
+//! [html_tbody]: https://html.spec.whatwg.org/multipage/tables.html#the-tbody-element
+//! [html_td]: https://html.spec.whatwg.org/multipage/tables.html#the-td-element
+//! [html_th]: https://html.spec.whatwg.org/multipage/tables.html#the-th-element
+//! [html_thead]: https://html.spec.whatwg.org/multipage/tables.html#the-thead-element
+//! [html_tr]: https://html.spec.whatwg.org/multipage/tables.html#the-tr-element
+
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
+use crate::event::{Content, Event, Kind, Link, Name};
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back};
+use alloc::vec;
+
+/// Start of a GFM table.
+///
+/// If there is a valid table row or table head before, then we try to parse
+/// another row.
+/// Otherwise, we try to parse a head.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// | | - |
+/// > | | b |
+/// ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.parse_state.options.constructs.gfm_table {
+ if !tokenizer.pierce
+ && !tokenizer.events.is_empty()
+ && matches!(
+ tokenizer.events[skip_opt_back(
+ &tokenizer.events,
+ tokenizer.events.len() - 1,
+ &[Name::LineEnding, Name::SpaceOrTab],
+ )]
+ .name,
+ Name::GfmTableHead | Name::GfmTableRow
+ )
+ {
+ State::Retry(StateName::GfmTableBodyRowStart)
+ } else {
+ State::Retry(StateName::GfmTableHeadRowBefore)
+ }
+ } else {
+ State::Nok
+ }
+}
+
+/// Before table head row.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// | | - |
+/// | | b |
+/// ```
+pub fn head_row_before(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.enter(Name::GfmTableHead);
+ tokenizer.enter(Name::GfmTableRow);
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::GfmTableHeadRowStart), State::Nok);
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.options.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ } else {
+ State::Retry(StateName::GfmTableHeadRowStart)
+ }
+}
+
+/// Before table head row, after whitespace.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// | | - |
+/// | | b |
+/// ```
+pub fn head_row_start(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ // 4+ spaces.
+ Some(b'\t' | b' ') => State::Nok,
+ Some(b'|') => State::Retry(StateName::GfmTableHeadRowBreak),
+ _ => {
+ tokenizer.tokenize_state.seen = true;
+ State::Retry(StateName::GfmTableHeadRowBreak)
+ }
+ }
+}
+
+/// At break in table head row.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// ^
+/// ^
+/// | | - |
+/// | | b |
+/// ```
+pub fn head_row_break(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => {
+ tokenizer.tokenize_state.seen = false;
+ State::Nok
+ }
+ Some(b'\n') => {
+ // Feel free to interrupt:
+ tokenizer.interrupt = true;
+ tokenizer.exit(Name::GfmTableRow);
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ State::Next(StateName::GfmTableHeadDelimiterStart)
+ }
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ }
+ _ => {
+ // Whether a delimiter was seen.
+ if tokenizer.tokenize_state.seen {
+ tokenizer.tokenize_state.seen = false;
+ // Header cell count.
+ tokenizer.tokenize_state.size += 1;
+ }
+
+ if tokenizer.current == Some(b'|') {
+ tokenizer.enter(Name::GfmTableCellDivider);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableCellDivider);
+ // Whether a delimiter was seen.
+ tokenizer.tokenize_state.seen = true;
+ State::Next(StateName::GfmTableHeadRowBreak)
+ } else {
+ // Anything else is cell data.
+ tokenizer.enter(Name::Data);
+ State::Retry(StateName::GfmTableHeadRowData)
+ }
+ }
+ }
+}
+
+/// In table head row data.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// | | - |
+/// | | b |
+/// ```
+pub fn head_row_data(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\t' | b'\n' | b' ' | b'|') => {
+ tokenizer.exit(Name::Data);
+ State::Retry(StateName::GfmTableHeadRowBreak)
+ }
+ _ => {
+ let name = if tokenizer.current == Some(b'\\') {
+ StateName::GfmTableHeadRowEscape
+ } else {
+ StateName::GfmTableHeadRowData
+ };
+ tokenizer.consume();
+ State::Next(name)
+ }
+ }
+}
+
+/// In table head row escape.
+///
+/// ```markdown
+/// > | | a\-b |
+/// ^
+/// | | ---- |
+/// | | c |
+/// ```
+pub fn head_row_escape(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\\' | b'|') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmTableHeadRowData)
+ }
+ _ => State::Retry(StateName::GfmTableHeadRowData),
+ }
+}
+
+/// Before delimiter row.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// | | b |
+/// ```
+pub fn head_delimiter_start(tokenizer: &mut Tokenizer) -> State {
+ // Reset `interrupt`.
+ tokenizer.interrupt = false;
+
+ if tokenizer.lazy || tokenizer.pierce {
+ State::Nok
+ } else {
+ tokenizer.enter(Name::GfmTableDelimiterRow);
+ // Track if we’ve seen a `:` or `|`.
+ tokenizer.tokenize_state.seen = false;
+
+ match tokenizer.current {
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(
+ State::Next(StateName::GfmTableHeadDelimiterBefore),
+ State::Next(StateName::GfmTableHeadDelimiterNok),
+ );
+
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.options.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ }
+ _ => State::Retry(StateName::GfmTableHeadDelimiterBefore),
+ }
+ }
+}
+
+/// Before delimiter row, after optional whitespace.
+///
+/// Reused when a `|` is found later, to parse another cell.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// | | b |
+/// ```
+pub fn head_delimiter_before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'-' | b':') => State::Retry(StateName::GfmTableHeadDelimiterValueBefore),
+ Some(b'|') => {
+ tokenizer.tokenize_state.seen = true;
+ // If we start with a pipe, we open a cell marker.
+ tokenizer.enter(Name::GfmTableCellDivider);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableCellDivider);
+ State::Next(StateName::GfmTableHeadDelimiterCellBefore)
+ }
+ // More whitespace / empty row not allowed at start.
+ _ => State::Retry(StateName::GfmTableHeadDelimiterNok),
+ }
+}
+
+/// After `|`, before delimiter cell.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// ```
+pub fn head_delimiter_cell_before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(
+ State::Next(StateName::GfmTableHeadDelimiterValueBefore),
+ State::Nok,
+ );
+ State::Retry(space_or_tab(tokenizer))
+ }
+ _ => State::Retry(StateName::GfmTableHeadDelimiterValueBefore),
+ }
+}
+
+/// Before delimiter cell value.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// ```
+pub fn head_delimiter_value_before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => State::Retry(StateName::GfmTableHeadDelimiterCellAfter),
+ Some(b':') => {
+ // Align: left.
+ tokenizer.tokenize_state.size_b += 1;
+ tokenizer.tokenize_state.seen = true;
+ tokenizer.enter(Name::GfmTableDelimiterMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableDelimiterMarker);
+ State::Next(StateName::GfmTableHeadDelimiterLeftAlignmentAfter)
+ }
+ Some(b'-') => {
+ // Align: none.
+ tokenizer.tokenize_state.size_b += 1;
+ State::Retry(StateName::GfmTableHeadDelimiterLeftAlignmentAfter)
+ }
+ _ => State::Retry(StateName::GfmTableHeadDelimiterNok),
+ }
+}
+
+/// After delimiter cell left alignment marker.
+///
+/// ```markdown
+/// | | a |
+/// > | | :- |
+/// ^
+/// ```
+pub fn head_delimiter_left_alignment_after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'-') => {
+ tokenizer.enter(Name::GfmTableDelimiterFiller);
+ State::Retry(StateName::GfmTableHeadDelimiterFiller)
+ }
+ // Anything else is not ok after the left-align colon.
+ _ => State::Retry(StateName::GfmTableHeadDelimiterNok),
+ }
+}
+
+/// In delimiter cell filler.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// ```
+pub fn head_delimiter_filler(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'-') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmTableHeadDelimiterFiller)
+ }
+ Some(b':') => {
+ // Align is `center` if it was `left`, `right` otherwise.
+ tokenizer.tokenize_state.seen = true;
+ tokenizer.exit(Name::GfmTableDelimiterFiller);
+ tokenizer.enter(Name::GfmTableDelimiterMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableDelimiterMarker);
+ State::Next(StateName::GfmTableHeadDelimiterRightAlignmentAfter)
+ }
+ _ => {
+ tokenizer.exit(Name::GfmTableDelimiterFiller);
+ State::Retry(StateName::GfmTableHeadDelimiterRightAlignmentAfter)
+ }
+ }
+}
+
+/// After delimiter cell right alignment marker.
+///
+/// ```markdown
+/// | | a |
+/// > | | -: |
+/// ^
+/// ```
+pub fn head_delimiter_right_alignment_after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(
+ State::Next(StateName::GfmTableHeadDelimiterCellAfter),
+ State::Nok,
+ );
+ State::Retry(space_or_tab(tokenizer))
+ }
+ _ => State::Retry(StateName::GfmTableHeadDelimiterCellAfter),
+ }
+}
+
+/// After delimiter cell.
+///
+/// ```markdown
+/// | | a |
+/// > | | -: |
+/// ^
+/// ```
+pub fn head_delimiter_cell_after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ // Exit when:
+ // * there was no `:` or `|` at all (it’s a thematic break or setext
+ // underline instead)
+ // * the header cell count is not the delimiter cell count
+ if !tokenizer.tokenize_state.seen
+ || tokenizer.tokenize_state.size != tokenizer.tokenize_state.size_b
+ {
+ State::Retry(StateName::GfmTableHeadDelimiterNok)
+ } else {
+ // Reset.
+ tokenizer.tokenize_state.seen = false;
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.size_b = 0;
+ tokenizer.exit(Name::GfmTableDelimiterRow);
+ tokenizer.exit(Name::GfmTableHead);
+ tokenizer.register_resolver(ResolveName::GfmTable);
+ State::Ok
+ }
+ }
+ Some(b'|') => State::Retry(StateName::GfmTableHeadDelimiterBefore),
+ _ => State::Retry(StateName::GfmTableHeadDelimiterNok),
+ }
+}
+
+/// In delimiter row, at a disallowed byte.
+///
+/// ```markdown
+/// | | a |
+/// > | | x |
+/// ^
+/// ```
+pub fn head_delimiter_nok(tokenizer: &mut Tokenizer) -> State {
+ // Reset.
+ tokenizer.tokenize_state.seen = false;
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.size_b = 0;
+ State::Nok
+}
+
+/// Before table body row.
+///
+/// ```markdown
+/// | | a |
+/// | | - |
+/// > | | b |
+/// ^
+/// ```
+pub fn body_row_start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.lazy {
+ State::Nok
+ } else {
+ tokenizer.enter(Name::GfmTableRow);
+
+ match tokenizer.current {
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBefore), State::Nok);
+
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.options.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ }
+ _ => State::Retry(StateName::GfmTableBodyRowBefore),
+ }
+ }
+}
+
+/// Before table body row, after optional whitespace.
+///
+/// ```markdown
+/// | | a |
+/// | | - |
+/// > | | b |
+/// ^
+/// ```
+pub fn body_row_before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\t' | b' ') => State::Nok,
+ _ => State::Retry(StateName::GfmTableBodyRowBreak),
+ }
+}
+
+/// At break in table body row.
+///
+/// ```markdown
+/// | | a |
+/// | | - |
+/// > | | b |
+/// ^
+/// ^
+/// ^
+/// ```
+pub fn body_row_break(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::GfmTableRow);
+ State::Ok
+ }
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBreak), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ }
+ Some(b'|') => {
+ tokenizer.enter(Name::GfmTableCellDivider);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableCellDivider);
+ State::Next(StateName::GfmTableBodyRowBreak)
+ }
+ // Anything else is cell content.
+ _ => {
+ tokenizer.enter(Name::Data);
+ State::Retry(StateName::GfmTableBodyRowData)
+ }
+ }
+}
+
+/// In table body row data.
+///
+/// ```markdown
+/// | | a |
+/// | | - |
+/// > | | b |
+/// ^
+/// ```
+pub fn body_row_data(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\t' | b'\n' | b' ' | b'|') => {
+ tokenizer.exit(Name::Data);
+ State::Retry(StateName::GfmTableBodyRowBreak)
+ }
+ _ => {
+ let name = if tokenizer.current == Some(b'\\') {
+ StateName::GfmTableBodyRowEscape
+ } else {
+ StateName::GfmTableBodyRowData
+ };
+ tokenizer.consume();
+ State::Next(name)
+ }
+ }
+}
+
+/// In table body row escape.
+///
+/// ```markdown
+/// | | a |
+/// | | ---- |
+/// > | | b\-c |
+/// ^
+/// ```
+pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\\' | b'|') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmTableBodyRowData)
+ }
+ _ => State::Retry(StateName::GfmTableBodyRowData),
+ }
+}
+
+/// Resolve GFM table.
+pub fn resolve(tokenizer: &mut Tokenizer) {
+ let mut index = 0;
+ // let mut tables = vec![];
+ let mut in_first_cell_awaiting_pipe = true;
+ let mut in_row = false;
+ let mut in_delimiter_row = false;
+ let mut last_cell = (0, 0, 0, 0);
+ let mut cell = (0, 0, 0, 0);
+
+ let mut after_head_awaiting_first_body_row = false;
+ let mut last_table_end = 0;
+ let mut last_table_has_body = false;
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.kind == Kind::Enter {
+ // Start of head.
+ if event.name == Name::GfmTableHead {
+ after_head_awaiting_first_body_row = false;
+
+ // Inject previous (body end and) table end.
+ if last_table_end != 0 {
+ flush_table_end(tokenizer, last_table_end, last_table_has_body);
+ last_table_has_body = false;
+ last_table_end = 0;
+ }
+
+ // Inject table start.
+ tokenizer.map.add(
+ index,
+ 0,
+ vec![Event {
+ kind: Kind::Enter,
+ name: Name::GfmTable,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ }],
+ );
+ } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow {
+ in_delimiter_row = event.name == Name::GfmTableDelimiterRow;
+ in_row = true;
+ in_first_cell_awaiting_pipe = true;
+ last_cell = (0, 0, 0, 0);
+ cell = (0, index + 1, 0, 0);
+
+ // Inject table body start.
+ if after_head_awaiting_first_body_row {
+ after_head_awaiting_first_body_row = false;
+ last_table_has_body = true;
+ tokenizer.map.add(
+ index,
+ 0,
+ vec![Event {
+ kind: Kind::Enter,
+ name: Name::GfmTableBody,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ }],
+ );
+ }
+ }
+ // Cell data.
+ else if in_row
+ && (event.name == Name::Data
+ || event.name == Name::GfmTableDelimiterMarker
+ || event.name == Name::GfmTableDelimiterFiller)
+ {
+ in_first_cell_awaiting_pipe = false;
+
+ // First value in cell.
+ if cell.2 == 0 {
+ if last_cell.1 != 0 {
+ cell.0 = cell.1;
+ flush_cell(tokenizer, last_cell, in_delimiter_row, None);
+ last_cell = (0, 0, 0, 0);
+ }
+
+ cell.2 = index;
+ }
+ } else if event.name == Name::GfmTableCellDivider {
+ if in_first_cell_awaiting_pipe {
+ in_first_cell_awaiting_pipe = false;
+ } else {
+ if last_cell.1 != 0 {
+ cell.0 = cell.1;
+ flush_cell(tokenizer, last_cell, in_delimiter_row, None);
+ }
+
+ last_cell = cell;
+ cell = (last_cell.1, index, 0, 0);
+ }
+ }
+ // Exit events.
+ } else if event.name == Name::GfmTableHead {
+ after_head_awaiting_first_body_row = true;
+ last_table_end = index;
+ } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow {
+ in_row = false;
+ last_table_end = index;
+ if last_cell.1 != 0 {
+ cell.0 = cell.1;
+ flush_cell(tokenizer, last_cell, in_delimiter_row, Some(index));
+ } else if cell.1 != 0 {
+ flush_cell(tokenizer, cell, in_delimiter_row, Some(index));
+ }
+ } else if in_row
+ && (event.name == Name::Data
+ || event.name == Name::GfmTableDelimiterMarker
+ || event.name == Name::GfmTableDelimiterFiller)
+ {
+ cell.3 = index;
+ }
+
+ index += 1;
+ }
+
+ if last_table_end != 0 {
+ flush_table_end(tokenizer, last_table_end, last_table_has_body);
+ }
+}
+
+/// Generate a cell.
+fn flush_cell(
+ tokenizer: &mut Tokenizer,
+ range: (usize, usize, usize, usize),
+ in_delimiter_row: bool,
+ row_end: Option<usize>,
+) {
+ let group_name = if in_delimiter_row {
+ Name::GfmTableDelimiterCell
+ } else {
+ Name::GfmTableCell
+ };
+ let value_name = if in_delimiter_row {
+ Name::GfmTableDelimiterCellValue
+ } else {
+ Name::GfmTableCellText
+ };
+
+ // Insert an exit for the previous cell, if there is one.
+ //
+ // ```markdown
+ // > | | aa | bb | cc |
+ // ^-- exit
+ // ^^^^-- this cell
+ // ```
+ if range.0 != 0 {
+ tokenizer.map.add(
+ range.0,
+ 0,
+ vec![Event {
+ kind: Kind::Exit,
+ name: group_name.clone(),
+ point: tokenizer.events[range.0].point.clone(),
+ link: None,
+ }],
+ );
+ }
+
+ // Insert enter of this cell.
+ //
+ // ```markdown
+ // > | | aa | bb | cc |
+ // ^-- enter
+ // ^^^^-- this cell
+ // ```
+ tokenizer.map.add(
+ range.1,
+ 0,
+ vec![Event {
+ kind: Kind::Enter,
+ name: group_name.clone(),
+ point: tokenizer.events[range.1].point.clone(),
+ link: None,
+ }],
+ );
+
+ // Insert text start at first data start and end at last data end, and
+ // remove events between.
+ //
+ // ```markdown
+ // > | | aa | bb | cc |
+ // ^-- enter
+ // ^-- exit
+ // ^^^^-- this cell
+ // ```
+ if range.2 != 0 {
+ tokenizer.map.add(
+ range.2,
+ 0,
+ vec![Event {
+ kind: Kind::Enter,
+ name: value_name.clone(),
+ point: tokenizer.events[range.2].point.clone(),
+ link: None,
+ }],
+ );
+ debug_assert_ne!(range.3, 0);
+
+ if !in_delimiter_row {
+ tokenizer.events[range.2].link = Some(Link {
+ previous: None,
+ next: None,
+ content: Content::Text,
+ });
+
+ if !in_delimiter_row && range.3 > range.2 + 1 {
+ let a = range.2 + 1;
+ let b = range.3 - range.2 - 1;
+ tokenizer.map.add(a, b, vec![]);
+ }
+ }
+
+ tokenizer.map.add(
+ range.3 + 1,
+ 0,
+ vec![Event {
+ kind: Kind::Exit,
+ name: value_name,
+ point: tokenizer.events[range.3].point.clone(),
+ link: None,
+ }],
+ );
+ }
+
+ // Insert an exit for the last cell, if at the row end.
+ //
+ // ```markdown
+ // > | | aa | bb | cc |
+ // ^-- exit
+ // ^^^^^^-- this cell (the last one contains two “between” parts)
+ // ```
+ if let Some(row_end) = row_end {
+ tokenizer.map.add(
+ row_end,
+ 0,
+ vec![Event {
+ kind: Kind::Exit,
+ name: group_name,
+ point: tokenizer.events[row_end].point.clone(),
+ link: None,
+ }],
+ );
+ }
+}
+
+/// Generate table end (and table body end).
+fn flush_table_end(tokenizer: &mut Tokenizer, index: usize, body: bool) {
+ let mut exits = vec![];
+
+ if body {
+ exits.push(Event {
+ kind: Kind::Exit,
+ name: Name::GfmTableBody,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ });
+ }
+
+ exits.push(Event {
+ kind: Kind::Exit,
+ name: Name::GfmTable,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ });
+
+ tokenizer.map.add(index + 1, 0, exits);
+}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index df1d4fb..e9cc759 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -87,6 +87,7 @@ use alloc::vec;
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.options.constructs.heading_setext
&& !tokenizer.lazy
+ && !tokenizer.pierce
// Require a paragraph before.
&& (!tokenizer.events.is_empty()
&& tokenizer.events[skip_opt_back(
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index b5a6013..8a9edfb 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -191,7 +191,6 @@ use crate::util::{
slice::{Position, Slice},
};
use alloc::{string::String, vec};
-extern crate std;
/// Start of label end.
///
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 9add015..de88174 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -60,6 +60,7 @@
//! * [frontmatter][]
//! * [gfm autolink literal][gfm_autolink_literal]
//! * [gfm footnote definition][gfm_footnote_definition]
+//! * [gfm table][gfm_table]
//! * [gfm task list item check][gfm_task_list_item_check]
//! * [gfm label start footnote][gfm_label_start_footnote]
//! * math (text) (in `raw_text`)
@@ -151,6 +152,7 @@ pub mod frontmatter;
pub mod gfm_autolink_literal;
pub mod gfm_footnote_definition;
pub mod gfm_label_start_footnote;
+pub mod gfm_table;
pub mod gfm_task_list_item_check;
pub mod hard_break_escape;
pub mod heading_atx;
diff --git a/src/event.rs b/src/event.rs
index 3e540c0..ba266b4 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -1240,6 +1240,265 @@ pub enum Name {
/// ^
/// ```
GfmStrikethroughText,
+ /// GFM extension: Table.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [flow content][crate::construct::flow]
+ /// * **Content model**:
+ /// [`GfmTableBody`][Name::GfmTableBody],
+ /// [`GfmTableHead`][Name::GfmTableHead],
+ /// [`LineEnding`][Name::LineEnding]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// > | | - |
+ /// ^^^^^
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ GfmTable,
+ /// GFM extension: Table body.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTable`][Name::GfmTable]
+ /// * **Content model**:
+ /// [`GfmTableRow`][Name::GfmTableRow],
+ /// [`LineEnding`][Name::LineEnding]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// | | - |
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ GfmTableBody,
+ /// GFM extension: Table cell.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableRow`][Name::GfmTableRow]
+ /// * **Content model**:
+ /// [`GfmTableCellDivider`][Name::GfmTableCellDivider],
+ /// [`GfmTableCellText`][Name::GfmTableCellText],
+ /// [`SpaceOrTab`][Name::SpaceOrTab]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// | | - |
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ GfmTableCell,
+ /// GFM extension: Table cell text.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableCell`][Name::GfmTableCell]
+ /// * **Content model**:
+ /// [text content][crate::construct::text]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^
+ /// | | - |
+ /// > | | b |
+ /// ^
+ /// ```
+ GfmTableCellText,
+ /// GFM extension: Table cell divider.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableCell`][Name::GfmTableCell]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^ ^
+ /// > | | - |
+ /// ^ ^
+ /// > | | b |
+ /// ^ ^
+ /// ```
+ GfmTableCellDivider,
+ /// GFM extension: Table delimiter row.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableHead`][Name::GfmTableHead]
+ /// * **Content model**:
+ /// [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | - |
+ /// ^^^^^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterRow,
+ /// GFM extension: Table delimiter alignment marker.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | :- |
+ /// ^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterMarker,
+ /// GFM extension: Table delimiter cell.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow]
+ /// * **Content model**:
+ /// [`GfmTableCellDivider`][Name::GfmTableCellDivider],
+ /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue],
+ /// [`SpaceOrTab`][Name::SpaceOrTab]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | - |
+ /// ^^^^^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterCell,
+ /// GFM extension: Table delimiter cell alignment.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell]
+ /// * **Content model**:
+ /// [`GfmTableDelimiterMarker`][Name::GfmTableDelimiterMarker],
+ /// [`GfmTableDelimiterFiller`][Name::GfmTableDelimiterFiller]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | - |
+ /// ^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterCellValue,
+ /// GFM extension: Table delimiter filler.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | - |
+ /// ^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterFiller,
+ /// GFM extension: Table head.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTable`][Name::GfmTable]
+ /// * **Content model**:
+ /// [`GfmTableRow`][Name::GfmTableRow],
+ /// [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow],
+ /// [`LineEnding`][Name::LineEnding]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// > | | - |
+ /// ^^^^^
+ /// | | b |
+ /// ```
+ GfmTableHead,
+ /// GFM extension: Table row.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableBody`][Name::GfmTableBody],
+ /// [`GfmTableHead`][Name::GfmTableHead]
+ /// * **Content model**:
+ /// [`GfmTableCell`][Name::GfmTableCell]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// | | - |
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ GfmTableRow,
/// GFM extension: task list item check.
///
/// ## Info
@@ -2436,7 +2695,7 @@ pub enum Name {
}
/// List of void events, used to make sure everything is working well.
-pub const VOID_EVENTS: [Name; 57] = [
+pub const VOID_EVENTS: [Name; 60] = [
Name::AttentionSequence,
Name::AutolinkEmail,
Name::AutolinkMarker,
@@ -2469,6 +2728,9 @@ pub const VOID_EVENTS: [Name; 57] = [
Name::GfmFootnoteDefinitionLabelMarker,
Name::GfmFootnoteDefinitionMarker,
Name::GfmStrikethroughSequence,
+ Name::GfmTableCellDivider,
+ Name::GfmTableDelimiterMarker,
+ Name::GfmTableDelimiterFiller,
Name::GfmTaskListItemMarker,
Name::GfmTaskListItemValueChecked,
Name::GfmTaskListItemValueUnchecked,
diff --git a/src/lib.rs b/src/lib.rs
index 4d1b762..0cf4f49 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -191,6 +191,17 @@ pub struct Constructs {
/// ^^^
/// ```
pub gfm_strikethrough: bool,
+ /// GFM: table.
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// > | | - |
+ /// ^^^^^
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ pub gfm_table: bool,
/// GFM: task list item.
///
/// ```markdown
@@ -317,6 +328,7 @@ impl Default for Constructs {
gfm_label_start_footnote: false,
gfm_footnote_definition: false,
gfm_strikethrough: false,
+ gfm_table: false,
gfm_task_list_item: false,
hard_break_escape: true,
hard_break_trailing: true,
@@ -346,6 +358,7 @@ impl Constructs {
gfm_footnote_definition: true,
gfm_label_start_footnote: true,
gfm_strikethrough: true,
+ gfm_table: true,
gfm_task_list_item: true,
..Self::default()
}
diff --git a/src/resolve.rs b/src/resolve.rs
index a62d382..d015213 100644
--- a/src/resolve.rs
+++ b/src/resolve.rs
@@ -19,6 +19,11 @@ pub enum Name {
/// and what occurs before and after each sequence.
/// Otherwise they are turned into data.
Attention,
+ /// Resolve GFM tables.
+ ///
+ /// The table head, and later each row, are all parsed separately.
+ /// Resolving groups everything together, and groups cells.
+ GfmTable,
/// Resolve heading (atx).
///
/// Heading (atx) contains further sequences and data.
@@ -60,6 +65,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) {
let func = match name {
Name::Label => construct::label_end::resolve,
Name::Attention => construct::attention::resolve,
+ Name::GfmTable => construct::gfm_table::resolve,
Name::HeadingAtx => construct::heading_atx::resolve,
Name::HeadingSetext => construct::heading_setext::resolve,
Name::ListItem => construct::list_item::resolve,
diff --git a/src/state.rs b/src/state.rs
index a42e802..5013ec8 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -123,6 +123,7 @@ pub enum Name {
DocumentFlowEnd,
FlowStart,
+ FlowBeforeGfmTable,
FlowBeforeCodeIndented,
FlowBeforeRaw,
FlowBeforeHtml,
@@ -309,6 +310,28 @@ pub enum Name {
StringBefore,
StringBeforeData,
+ GfmTableStart,
+ GfmTableHeadRowBefore,
+ GfmTableHeadRowStart,
+ GfmTableHeadRowBreak,
+ GfmTableHeadRowData,
+ GfmTableHeadRowEscape,
+ GfmTableHeadDelimiterStart,
+ GfmTableHeadDelimiterBefore,
+ GfmTableHeadDelimiterCellBefore,
+ GfmTableHeadDelimiterValueBefore,
+ GfmTableHeadDelimiterLeftAlignmentAfter,
+ GfmTableHeadDelimiterFiller,
+ GfmTableHeadDelimiterRightAlignmentAfter,
+ GfmTableHeadDelimiterCellAfter,
+ GfmTableHeadDelimiterNok,
+
+ GfmTableBodyRowBefore,
+ GfmTableBodyRowStart,
+ GfmTableBodyRowBreak,
+ GfmTableBodyRowData,
+ GfmTableBodyRowEscape,
+
TextStart,
TextBefore,
TextBeforeHtml,
@@ -445,6 +468,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::DocumentFlowInside => construct::document::flow_inside,
Name::FlowStart => construct::flow::start,
+ Name::FlowBeforeGfmTable => construct::flow::before_gfm_table,
Name::FlowBeforeCodeIndented => construct::flow::before_code_indented,
Name::FlowBeforeRaw => construct::flow::before_raw,
Name::FlowBeforeHtml => construct::flow::before_html,
@@ -662,6 +686,33 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::StringBefore => construct::string::before,
Name::StringBeforeData => construct::string::before_data,
+ Name::GfmTableStart => construct::gfm_table::start,
+ Name::GfmTableHeadRowBefore => construct::gfm_table::head_row_before,
+ Name::GfmTableHeadRowStart => construct::gfm_table::head_row_start,
+ Name::GfmTableHeadRowBreak => construct::gfm_table::head_row_break,
+ Name::GfmTableHeadRowData => construct::gfm_table::head_row_data,
+ Name::GfmTableHeadRowEscape => construct::gfm_table::head_row_escape,
+
+ Name::GfmTableHeadDelimiterStart => construct::gfm_table::head_delimiter_start,
+ Name::GfmTableHeadDelimiterBefore => construct::gfm_table::head_delimiter_before,
+ Name::GfmTableHeadDelimiterCellBefore => construct::gfm_table::head_delimiter_cell_before,
+ Name::GfmTableHeadDelimiterValueBefore => construct::gfm_table::head_delimiter_value_before,
+ Name::GfmTableHeadDelimiterLeftAlignmentAfter => {
+ construct::gfm_table::head_delimiter_left_alignment_after
+ }
+ Name::GfmTableHeadDelimiterFiller => construct::gfm_table::head_delimiter_filler,
+ Name::GfmTableHeadDelimiterRightAlignmentAfter => {
+ construct::gfm_table::head_delimiter_right_alignment_after
+ }
+ Name::GfmTableHeadDelimiterCellAfter => construct::gfm_table::head_delimiter_cell_after,
+ Name::GfmTableHeadDelimiterNok => construct::gfm_table::head_delimiter_nok,
+
+ Name::GfmTableBodyRowBefore => construct::gfm_table::body_row_before,
+ Name::GfmTableBodyRowStart => construct::gfm_table::body_row_start,
+ Name::GfmTableBodyRowBreak => construct::gfm_table::body_row_break,
+ Name::GfmTableBodyRowData => construct::gfm_table::body_row_data,
+ Name::GfmTableBodyRowEscape => construct::gfm_table::body_row_escape,
+
Name::TextStart => construct::text::start,
Name::TextBefore => construct::text::before,
Name::TextBeforeHtml => construct::text::before_html,
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9b73836..0ab8784 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -305,10 +305,12 @@ pub struct Tokenizer<'a> {
///
/// Used when tokenizing [document content][crate::construct::document].
pub concrete: bool,
- /// Whether this line is lazy.
+ /// Whether this row is piercing into the current construct with more
+ /// containers.
///
- /// The previous line was a paragraph, and this line’s containers did not
- /// match.
+ /// Used when tokenizing [document content][crate::construct::document].
+ pub pierce: bool,
+ /// Whether this line is lazy: there are less containers than before.
pub lazy: bool,
}
@@ -370,6 +372,7 @@ impl<'a> Tokenizer<'a> {
},
map: EditMap::new(),
interrupt: false,
+ pierce: true,
concrete: false,
lazy: false,
resolvers: vec![],
diff --git a/tests/gfm_table.rs b/tests/gfm_table.rs
new file mode 100644
index 0000000..a265549
--- /dev/null
+++ b/tests/gfm_table.rs
@@ -0,0 +1,1782 @@
+extern crate micromark;
+use micromark::{micromark, micromark_with_options, Constructs, Options};
+use pretty_assertions::assert_eq;
+
+#[test]
+fn gfm_table() {
+ let gfm = Options {
+ constructs: Constructs::gfm(),
+ ..Options::default()
+ };
+
+ assert_eq!(
+ micromark("| a |\n| - |\n| b |"),
+ "<p>| a |\n| - |\n| b |</p>",
+ "should ignore tables by default"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n| b |", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>",
+ "should support tables"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |", &gfm),
+ "<p>| a |</p>",
+ "should not support a table w/ the head row ending in an eof (1)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a", &gfm),
+ "<p>| a</p>",
+ "should not support a table w/ the head row ending in an eof (2)"
+ );
+
+ assert_eq!(
+ micromark_with_options("a |", &gfm),
+ "<p>a |</p>",
+ "should not support a table w/ the head row ending in an eof (3)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>",
+ "should support a table w/ a delimiter row ending in an eof (1)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a\n| -", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>",
+ "should support a table w/ a delimiter row ending in an eof (2)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n| b |", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>",
+ "should support a table w/ a body row ending in an eof (1)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a\n| -\n| b", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>",
+ "should support a table w/ a body row ending in an eof (2)"
+ );
+
+ assert_eq!(
+ micromark_with_options("a|b\n-|-\nc|d", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>c</td>\n<td>d</td>\n</tr>\n</tbody>\n</table>",
+ "should support a table w/ a body row ending in an eof (3)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a \n| -\t\n| b | ", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>",
+ "should support rows w/ trailing whitespace (1)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a | \n| - |", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>",
+ "should support rows w/ trailing whitespace (2)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - | ", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>",
+ "should support rows w/ trailing whitespace (3)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n| b | ", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>b</td>\n</tr>\n</tbody>\n</table>",
+ "should support rows w/ trailing whitespace (4)"
+ );
+
+ assert_eq!(
+ micromark_with_options("||a|\n|-|-|", &gfm),
+ "<table>\n<thead>\n<tr>\n<th></th>\n<th>a</th>\n</tr>\n</thead>\n</table>",
+ "should support empty first header cells"
+ );
+
+ assert_eq!(
+ micromark_with_options("|a||\n|-|-|", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n<th></th>\n</tr>\n</thead>\n</table>",
+ "should support empty last header cells"
+ );
+
+ assert_eq!(
+ micromark_with_options("a||b\n-|-|-", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n<th></th>\n<th>b</th>\n</tr>\n</thead>\n</table>",
+ "should support empty header cells"
+ );
+
+ assert_eq!(
+ micromark_with_options("|a|b|\n|-|-|\n||c|", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td></td>\n<td>c</td>\n</tr>\n</tbody>\n</table>",
+ "should support empty first body cells"
+ );
+
+ assert_eq!(
+ micromark_with_options("|a|b|\n|-|-|\n|c||", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>c</td>\n<td></td>\n</tr>\n</tbody>\n</table>",
+ "should support empty last body cells"
+ );
+
+ assert_eq!(
+ micromark_with_options("a|b|c\n-|-|-\nd||e", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n<th>c</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>d</td>\n<td></td>\n<td>e</td>\n</tr>\n</tbody>\n</table>",
+ "should support empty body cells"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n- b", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<ul>\n<li>b</li>\n</ul>",
+ "should support a list after a table"
+ );
+
+ assert_eq!(
+ micromark_with_options("> | a |\n| - |", &gfm),
+ "<blockquote>\n<p>| a |\n| - |</p>\n</blockquote>",
+ "should not support a lazy delimiter row (1)"
+ );
+
+ assert_eq!(
+ micromark_with_options("> a\n> | b |\n| - |", &gfm),
+ "<blockquote>\n<p>a\n| b |\n| - |</p>\n</blockquote>",
+ "should not support a lazy delimiter row (2)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n> | - |", &gfm),
+ "<p>| a |</p>\n<blockquote>\n<p>| - |</p>\n</blockquote>",
+ "should not support a piercing delimiter row"
+ );
+
+ assert_eq!(
+ micromark_with_options("> a\n> | b |\n|-", &gfm),
+ "<blockquote>\n<p>a\n| b |\n|-</p>\n</blockquote>",
+ "should not support a lazy body row (2)"
+ );
+
+ assert_eq!(
+ micromark_with_options("> | a |\n> | - |\n| b |", &gfm),
+ "<blockquote>\n<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n</blockquote>\n<p>| b |</p>",
+ "should not support a lazy body row (1)"
+ );
+
+ assert_eq!(
+ micromark_with_options("> a\n> | b |\n> | - |\n| c |", &gfm),
+ "<blockquote>\n<p>a</p>\n<table>\n<thead>\n<tr>\n<th>b</th>\n</tr>\n</thead>\n</table>\n</blockquote>\n<p>| c |</p>",
+ "should not support a lazy body row (2)"
+ );
+
+ assert_eq!(
+ micromark_with_options("> | A |\n> | - |\n> | 1 |\n| 2 |", &gfm),
+ "<blockquote>\n<table>\n<thead>\n<tr>\n<th>A</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>1</td>\n</tr>\n</tbody>\n</table>\n</blockquote>\n<p>| 2 |</p>",
+ "should not support a lazy body row (3)"
+ );
+
+ assert_eq!(
+ micromark_with_options(" - d\n - e", &gfm),
+ micromark(" - d\n - e"),
+ "should not change how lists and lazyness work"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n | - |", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>",
+ "should form a table if the delimiter row is indented w/ 3 spaces"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n | - |", &gfm),
+ "<p>| a |\n| - |</p>",
+ "should not form a table if the delimiter row is indented w/ 4 spaces"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n | - |", &Options {
+ constructs: Constructs {
+ code_indented: false,
+ ..Constructs::gfm()
+ },
+ ..Options::default()
+ }),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>",
+ "should form a table if the delimiter row is indented w/ 4 spaces and indented code is turned off"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n> block quote?", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<blockquote>\n<p>block quote?</p>\n</blockquote>",
+ "should be interrupted by a block quote"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n>", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<blockquote>\n</blockquote>",
+ "should be interrupted by a block quote (empty)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n- list?", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<ul>\n<li>list?</li>\n</ul>",
+ "should be interrupted by a list"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n-", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<ul>\n<li></li>\n</ul>",
+ "should be interrupted by a list (empty)"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ "| a |\n| - |\n<!-- HTML? -->",
+ &Options {
+ allow_dangerous_html: true,
+ constructs: Constructs::gfm(),
+ ..Options::default()
+ }
+ ),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<!-- HTML? -->",
+ "should be interrupted by HTML (flow)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n\tcode?", &Options {
+ allow_dangerous_html: true,
+ constructs: Constructs::gfm(),
+ ..Options::default()
+ }),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<pre><code>code?\n</code></pre>",
+ "should be interrupted by code (indented)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n```js\ncode?", &Options {
+ allow_dangerous_html: true,
+ constructs: Constructs::gfm(),
+ ..Options::default()
+ }),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<pre><code class=\"language-js\">code?\n</code></pre>\n",
+ "should be interrupted by code (fenced)"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ "| a |\n| - |\n***",
+ &Options {
+ allow_dangerous_html: true,
+ constructs: Constructs::gfm(),
+ ..Options::default()
+ }
+ ),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<hr />",
+ "should be interrupted by a thematic break"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\n# heading?", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n<h1>heading?</h1>",
+ "should be interrupted by a heading (ATX)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\nheading\n=", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>heading</td>\n</tr>\n<tr>\n<td>=</td>\n</tr>\n</tbody>\n</table>",
+ "should *not* be interrupted by a heading (setext)"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\nheading\n---", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>heading</td>\n</tr>\n</tbody>\n</table>\n<hr />",
+ "should *not* be interrupted by a heading (setext), but interrupt if the underline is also a thematic break"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n| - |\nheading\n-", &gfm),
+ "<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>heading</td>\n</tr>\n</tbody>\n</table>\n<ul>\n<li></li>\n</ul>",
+ "should *not* be interrupted by a heading (setext), but interrupt if the underline is also an empty list item bullet"
+ );
+
+ assert_eq!(
+ micromark_with_options("a\nb\n-:", &gfm),
+ "<p>a</p>\n<table>\n<thead>\n<tr>\n<th align=\"right\">b</th>\n</tr>\n</thead>\n</table>",
+ "should support a single head row"
+ );
+
+ assert_eq!(
+ micromark_with_options("> | a |\n> | - |", &gfm),
+ "<blockquote>\n<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n</blockquote>",
+ "should support a table in a container"
+ );
+
+ assert_eq!(
+ micromark_with_options("> | a |\n| - |", &gfm),
+ "<blockquote>\n<p>| a |\n| - |</p>\n</blockquote>",
+ "should not support a lazy delimiter row if the head row is in a container"
+ );
+
+ assert_eq!(
+ micromark_with_options("| a |\n> | - |", &gfm),
+ "<p>| a |</p>\n<blockquote>\n<p>| - |</p>\n</blockquote>",
+ "should not support a “piercing” container for the delimiter row, if the head row was not in that container"
+ );
+
+ assert_eq!(
+ micromark_with_options("> | a |\n> | - |\n| c |", &gfm),
+ "<blockquote>\n<table>\n<thead>\n<tr>\n<th>a</th>\n</tr>\n</thead>\n</table>\n</blockquote>\n<p>| c |</p>",
+ "should not support a lazy body row if the head row and delimiter row are in a container"
+ );
+
+ assert_eq!(
+ micromark_with_options("> | a |\n| - |\n> | c |", &gfm),
+ "<blockquote>\n<p>| a |\n| - |\n| c |</p>\n</blockquote>",
+ "should not support a lazy delimiter row if the head row and a further body row are in a container"
+ );
+
+ assert_eq!(micromark_with_options("", &gfm), "", "should support");
+
+ assert_eq!(micromark_with_options("", &gfm), "", "should support");
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# Align
+
+## An empty initial cell
+
+| | a|c|
+|--|:----:|:---|
+|a|b|c|
+|a|b|c|
+
+## Missing alignment characters
+
+| a | b | c |
+| |---|---|
+| d | e | f |
+
+* * *
+
+| a | b | c |
+|---|---| |
+| d | e | f |
+
+## Incorrect characters
+
+| a | b | c |
+|---|-*-|---|
+| d | e | f |
+
+## Two alignments
+
+|a|
+|::|
+
+|a|
+|:-:|
+
+## Two at the start or end
+
+|a|
+|::-|
+
+|a|
+|-::|
+
+## In the middle
+
+|a|
+|-:-|
+
+## A space in the middle
+
+|a|
+|- -|
+
+## No pipe
+
+a
+:-:
+
+a
+:-
+
+a
+-:
+
+## A single colon
+
+|a|
+|:|
+
+a
+:
+
+## Alignment on empty cells
+
+| a | b | c | d | e |
+| - | - | :- | -: | :-: |
+| f |
+"###,
+ &gfm
+ ),
+ r###"<h1>Align</h1>
+<h2>An empty initial cell</h2>
+<table>
+<thead>
+<tr>
+<th></th>
+<th align="center">a</th>
+<th align="left">c</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>a</td>
+<td align="center">b</td>
+<td align="left">c</td>
+</tr>
+<tr>
+<td>a</td>
+<td align="center">b</td>
+<td align="left">c</td>
+</tr>
+</tbody>
+</table>
+<h2>Missing alignment characters</h2>
+<p>| a | b | c |
+| |---|---|
+| d | e | f |</p>
+<hr />
+<p>| a | b | c |
+|---|---| |
+| d | e | f |</p>
+<h2>Incorrect characters</h2>
+<p>| a | b | c |
+|---|-*-|---|
+| d | e | f |</p>
+<h2>Two alignments</h2>
+<p>|a|
+|::|</p>
+<table>
+<thead>
+<tr>
+<th align="center">a</th>
+</tr>
+</thead>
+</table>
+<h2>Two at the start or end</h2>
+<p>|a|
+|::-|</p>
+<p>|a|
+|-::|</p>
+<h2>In the middle</h2>
+<p>|a|
+|-:-|</p>
+<h2>A space in the middle</h2>
+<p>|a|
+|- -|</p>
+<h2>No pipe</h2>
+<table>
+<thead>
+<tr>
+<th align="center">a</th>
+</tr>
+</thead>
+</table>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+</table>
+<table>
+<thead>
+<tr>
+<th align="right">a</th>
+</tr>
+</thead>
+</table>
+<h2>A single colon</h2>
+<p>|a|
+|:|</p>
+<p>a
+:</p>
+<h2>Alignment on empty cells</h2>
+<table>
+<thead>
+<tr>
+<th>a</th>
+<th>b</th>
+<th align="left">c</th>
+<th align="right">d</th>
+<th align="center">e</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>f</td>
+<td></td>
+<td align="left"></td>
+<td align="right"></td>
+<td align="center"></td>
+</tr>
+</tbody>
+</table>
+"###,
+ "should match alignment like GitHub"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# Tables
+
+| a | b | c |
+| - | - | - |
+| d | e | f |
+
+## No body
+
+| a | b | c |
+| - | - | - |
+
+## One column
+
+| a |
+| - |
+| b |
+"###,
+ &gfm
+ ),
+ r###"<h1>Tables</h1>
+<table>
+<thead>
+<tr>
+<th>a</th>
+<th>b</th>
+<th>c</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>d</td>
+<td>e</td>
+<td>f</td>
+</tr>
+</tbody>
+</table>
+<h2>No body</h2>
+<table>
+<thead>
+<tr>
+<th>a</th>
+<th>b</th>
+<th>c</th>
+</tr>
+</thead>
+</table>
+<h2>One column</h2>
+<table>
+<thead>
+<tr>
+<th>a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>b</td>
+</tr>
+</tbody>
+</table>
+"###,
+ "should match basic like GitHub"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# Tables in things
+
+## In lists
+
+* Unordered:
+
+ | A | B |
+ | - | - |
+ | 1 | 2 |
+
+1. Ordered:
+
+ | A | B |
+ | - | - |
+ | 1 | 2 |
+
+* Lazy?
+ | A | B |
+ | - | - |
+ | 1 | 2 |
+ | 3 | 4 |
+ | 5 | 6 |
+| 7 | 8 |
+
+## In block quotes
+
+> W/ space:
+> | A | B |
+> | - | - |
+> | 1 | 2 |
+
+>W/o space:
+>| A | B |
+>| - | - |
+>| 1 | 2 |
+
+> Lazy?
+> | A | B |
+> | - | - |
+> | 1 | 2 |
+>| 3 | 4 |
+| 5 | 6 |
+
+### List interrupting delimiters
+
+a |
+- |
+
+a
+-|
+
+a
+|-
+"###,
+ &gfm
+ ),
+ r###"<h1>Tables in things</h1>
+<h2>In lists</h2>
+<ul>
+<li>
+<p>Unordered:</p>
+<table>
+<thead>
+<tr>
+<th>A</th>
+<th>B</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>1</td>
+<td>2</td>
+</tr>
+</tbody>
+</table>
+</li>
+</ul>
+<ol>
+<li>
+<p>Ordered:</p>
+<table>
+<thead>
+<tr>
+<th>A</th>
+<th>B</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>1</td>
+<td>2</td>
+</tr>
+</tbody>
+</table>
+</li>
+</ol>
+<ul>
+<li>Lazy?
+<table>
+<thead>
+<tr>
+<th>A</th>
+<th>B</th>
+</tr>
+</thead>
+</table>
+</li>
+</ul>
+<p>| 1 | 2 |
+| 3 | 4 |
+| 5 | 6 |
+| 7 | 8 |</p>
+<h2>In block quotes</h2>
+<blockquote>
+<p>W/ space:</p>
+<table>
+<thead>
+<tr>
+<th>A</th>
+<th>B</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>1</td>
+<td>2</td>
+</tr>
+</tbody>
+</table>
+</blockquote>
+<blockquote>
+<p>W/o space:</p>
+<table>
+<thead>
+<tr>
+<th>A</th>
+<th>B</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>1</td>
+<td>2</td>
+</tr>
+</tbody>
+</table>
+</blockquote>
+<blockquote>
+<p>Lazy?</p>
+<table>
+<thead>
+<tr>
+<th>A</th>
+<th>B</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>1</td>
+<td>2</td>
+</tr>
+<tr>
+<td>3</td>
+<td>4</td>
+</tr>
+</tbody>
+</table>
+</blockquote>
+<p>| 5 | 6 |</p>
+<h3>List interrupting delimiters</h3>
+<p>a |</p>
+<ul>
+<li>|</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>a</th>
+</tr>
+</thead>
+</table>
+<table>
+<thead>
+<tr>
+<th>a</th>
+</tr>
+</thead>
+</table>
+"###,
+ "should match containers like GitHub"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"| a |
+| - |
+| - |
+| 1 |
+"###,
+ &gfm
+ ),
+ r###"<table>
+<thead>
+<tr>
+<th>a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>-</td>
+</tr>
+<tr>
+<td>1</td>
+</tr>
+</tbody>
+</table>
+"###,
+ "should match a double delimiter row like GitHub"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# Examples from GFM
+
+## A
+
+| foo | bar |
+| --- | --- |
+| baz | bim |
+
+## B
+
+| abc | defghi |
+:-: | -----------:
+bar | baz
+
+## C
+
+| f\|oo |
+| ------ |
+| b `\|` az |
+| b **\|** im |
+
+## D
+
+| abc | def |
+| --- | --- |
+| bar | baz |
+> bar
+
+## E
+
+| abc | def |
+| --- | --- |
+| bar | baz |
+bar
+
+bar
+
+## F
+
+| abc | def |
+| --- |
+| bar |
+
+## G
+
+| abc | def |
+| --- | --- |
+| bar |
+| bar | baz | boo |
+
+## H
+
+| abc | def |
+| --- | --- |
+"###,
+ &gfm
+ ),
+ r###"<h1>Examples from GFM</h1>
+<h2>A</h2>
+<table>
+<thead>
+<tr>
+<th>foo</th>
+<th>bar</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>baz</td>
+<td>bim</td>
+</tr>
+</tbody>
+</table>
+<h2>B</h2>
+<table>
+<thead>
+<tr>
+<th align="center">abc</th>
+<th align="right">defghi</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="center">bar</td>
+<td align="right">baz</td>
+</tr>
+</tbody>
+</table>
+<h2>C</h2>
+<table>
+<thead>
+<tr>
+<th>f|oo</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>b <code>|</code> az</td>
+</tr>
+<tr>
+<td>b <strong>|</strong> im</td>
+</tr>
+</tbody>
+</table>
+<h2>D</h2>
+<table>
+<thead>
+<tr>
+<th>abc</th>
+<th>def</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>bar</td>
+<td>baz</td>
+</tr>
+</tbody>
+</table>
+<blockquote>
+<p>bar</p>
+</blockquote>
+<h2>E</h2>
+<table>
+<thead>
+<tr>
+<th>abc</th>
+<th>def</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>bar</td>
+<td>baz</td>
+</tr>
+<tr>
+<td>bar</td>
+<td></td>
+</tr>
+</tbody>
+</table>
+<p>bar</p>
+<h2>F</h2>
+<p>| abc | def |
+| --- |
+| bar |</p>
+<h2>G</h2>
+<table>
+<thead>
+<tr>
+<th>abc</th>
+<th>def</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>bar</td>
+<td></td>
+</tr>
+<tr>
+<td>bar</td>
+<td>baz</td>
+</tr>
+</tbody>
+</table>
+<h2>H</h2>
+<table>
+<thead>
+<tr>
+<th>abc</th>
+<th>def</th>
+</tr>
+</thead>
+</table>
+"###,
+ "should match examples from the GFM spec like GitHub"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# Grave accents
+
+## Grave accent in cell
+
+| A | B |
+|--------------|---|
+| <kbd>`</kbd> | C |
+
+## Escaped grave accent in “inline code” in cell
+
+| A |
+|-----|
+| `\` |
+
+## “Empty” inline code
+
+| 1 | 2 | 3 |
+|---|------|----|
+| a | `` | |
+| b | `` | `` |
+| c | ` | ` |
+| d | `|` |
+| e | `\|` | |
+| f | \| | |
+
+## Escaped pipes in code in cells
+
+| `\|\\` |
+| --- |
+| `\|\\` |
+
+`\|\\`
+"###,
+ &Options {
+ allow_dangerous_html: true,
+ ..gfm.clone()
+ }
+ ),
+ r###"<h1>Grave accents</h1>
+<h2>Grave accent in cell</h2>
+<table>
+<thead>
+<tr>
+<th>A</th>
+<th>B</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><kbd>`</kbd></td>
+<td>C</td>
+</tr>
+</tbody>
+</table>
+<h2>Escaped grave accent in “inline code” in cell</h2>
+<table>
+<thead>
+<tr>
+<th>A</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>\</code></td>
+</tr>
+</tbody>
+</table>
+<h2>“Empty” inline code</h2>
+<table>
+<thead>
+<tr>
+<th>1</th>
+<th>2</th>
+<th>3</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>a</td>
+<td>``</td>
+<td></td>
+</tr>
+<tr>
+<td>b</td>
+<td>``</td>
+<td>``</td>
+</tr>
+<tr>
+<td>c</td>
+<td>`</td>
+<td>`</td>
+</tr>
+<tr>
+<td>d</td>
+<td>`</td>
+<td>`</td>
+</tr>
+<tr>
+<td>e</td>
+<td><code>|</code></td>
+<td></td>
+</tr>
+<tr>
+<td>f</td>
+<td>|</td>
+<td></td>
+</tr>
+</tbody>
+</table>
+<h2>Escaped pipes in code in cells</h2>
+<table>
+<thead>
+<tr>
+<th><code>|\\</code></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>|\\</code></td>
+</tr>
+</tbody>
+</table>
+<p><code>\|\\</code></p>
+"###,
+ "should match grave accent like GitHub"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# Code
+
+## Indented delimiter row
+
+a
+ |-
+
+a
+ |-
+
+## Indented body
+
+| a |
+ | - |
+ | C |
+ | D |
+ | E |
+"###,
+ &gfm
+ ),
+ r###"<h1>Code</h1>
+<h2>Indented delimiter row</h2>
+<table>
+<thead>
+<tr>
+<th>a</th>
+</tr>
+</thead>
+</table>
+<p>a
+|-</p>
+<h2>Indented body</h2>
+<table>
+<thead>
+<tr>
+<th>a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>C</td>
+</tr>
+<tr>
+<td>D</td>
+</tr>
+</tbody>
+</table>
+<pre><code>| E |
+</code></pre>
+"###,
+ "should match indent like GitHub"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"## Blank line
+
+a
+:-
+b
+
+c
+
+## Block quote
+
+a
+:-
+b
+> c
+
+## Code (fenced)
+
+a
+:-
+b
+```
+c
+```
+
+## Code (indented)
+
+a
+:-
+b
+ c
+
+## Definition
+
+a
+:-
+b
+[c]: d
+
+## Heading (atx)
+
+a
+:-
+b
+# c
+
+
+## Heading (setext) (rank 1)
+
+a
+:-
+b
+==
+c
+
+## Heading (setext) (rank 2)
+
+a
+:-
+b
+--
+c
+
+## HTML (flow, kind 1: raw)
+
+a
+:-
+b
+<pre>
+ a
+</pre>
+
+## HTML (flow, kind 2: comment)
+
+a
+:-
+b
+<!-- c -->
+
+## HTML (flow, kind 3: instruction)
+
+a
+:-
+b
+<? c ?>
+
+## HTML (flow, kind 4: declaration)
+
+a
+:-
+b
+<!C>
+
+## HTML (flow, kind 5: cdata)
+
+a
+:-
+b
+<![CDATA[c]]>
+
+## HTML (flow, kind 6: basic)
+
+a
+:-
+b
+<div>
+
+## HTML (flow, kind 7: complete)
+
+a
+:-
+b
+<x>
+
+## List (ordered, 1)
+
+a
+:-
+b
+1. c
+
+## List (ordered, other)
+
+a
+:-
+b
+2. c
+
+## List (unordered)
+
+a
+:-
+b
+* c
+
+## List (unordered, blank)
+
+a
+:-
+b
+*
+c
+
+## List (unordered, blank start)
+
+a
+:-
+b
+*
+ c
+
+## Thematic break
+
+a
+:-
+b
+***
+"###,
+ &Options {
+ allow_dangerous_html: true,
+ ..gfm.clone()
+ }
+ ),
+ r###"<h2>Blank line</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<p>c</p>
+<h2>Block quote</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<blockquote>
+<p>c</p>
+</blockquote>
+<h2>Code (fenced)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<pre><code>c
+</code></pre>
+<h2>Code (indented)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<pre><code>c
+</code></pre>
+<h2>Definition</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+<tr>
+<td align="left">[c]: d</td>
+</tr>
+</tbody>
+</table>
+<h2>Heading (atx)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<h1>c</h1>
+<h2>Heading (setext) (rank 1)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+<tr>
+<td align="left">==</td>
+</tr>
+<tr>
+<td align="left">c</td>
+</tr>
+</tbody>
+</table>
+<h2>Heading (setext) (rank 2)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+<tr>
+<td align="left">--</td>
+</tr>
+<tr>
+<td align="left">c</td>
+</tr>
+</tbody>
+</table>
+<h2>HTML (flow, kind 1: raw)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<pre>
+ a
+</pre>
+<h2>HTML (flow, kind 2: comment)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<!-- c -->
+<h2>HTML (flow, kind 3: instruction)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<? c ?>
+<h2>HTML (flow, kind 4: declaration)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<!C>
+<h2>HTML (flow, kind 5: cdata)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<![CDATA[c]]>
+<h2>HTML (flow, kind 6: basic)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<div>
+<h2>HTML (flow, kind 7: complete)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<x>
+<h2>List (ordered, 1)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<ol>
+<li>c</li>
+</ol>
+<h2>List (ordered, other)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<ol start="2">
+<li>c</li>
+</ol>
+<h2>List (unordered)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>c</li>
+</ul>
+<h2>List (unordered, blank)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li></li>
+</ul>
+<p>c</p>
+<h2>List (unordered, blank start)</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>c</li>
+</ul>
+<h2>Thematic break</h2>
+<table>
+<thead>
+<tr>
+<th align="left">a</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="left">b</td>
+</tr>
+</tbody>
+</table>
+<hr />
+"###,
+ "should match interrupt like GitHub"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# Loose
+
+## Loose
+
+Header 1 | Header 2
+-------- | --------
+Cell 1 | Cell 2
+Cell 3 | Cell 4
+
+## One “column”, loose
+
+a
+-
+b
+
+## No pipe in first row
+
+a
+| - |
+"###,
+ &gfm
+ ),
+ r###"<h1>Loose</h1>
+<h2>Loose</h2>
+<table>
+<thead>
+<tr>
+<th>Header 1</th>
+<th>Header 2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Cell 1</td>
+<td>Cell 2</td>
+</tr>
+<tr>
+<td>Cell 3</td>
+<td>Cell 4</td>
+</tr>
+</tbody>
+</table>
+<h2>One “column”, loose</h2>
+<h2>a</h2>
+<p>b</p>
+<h2>No pipe in first row</h2>
+<table>
+<thead>
+<tr>
+<th>a</th>
+</tr>
+</thead>
+</table>
+"###,
+ "should match loose tables like GitHub"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# Some more escapes
+
+| Head |
+| ------------- |
+| A | Alpha |
+| B \| Bravo |
+| C \\| Charlie |
+| D \\\| Delta |
+| E \\\\| Echo |
+
+Note: GH has a bug where in case C and E, the escaped escape is treated as a
+normal escape: <https://github.com/github/cmark-gfm/issues/277>.
+"###,
+ &gfm
+ ),
+ r###"<h1>Some more escapes</h1>
+<table>
+<thead>
+<tr>
+<th>Head</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>A</td>
+</tr>
+<tr>
+<td>B | Bravo</td>
+</tr>
+<tr>
+<td>C \</td>
+</tr>
+<tr>
+<td>D \| Delta</td>
+</tr>
+<tr>
+<td>E \\</td>
+</tr>
+</tbody>
+</table>
+<p>Note: GH has a bug where in case C and E, the escaped escape is treated as a
+normal escape: <a href="https://github.com/github/cmark-gfm/issues/277">https://github.com/github/cmark-gfm/issues/277</a>.</p>
+"###,
+ "should match loose escapes like GitHub"
+ );
+}
diff --git a/tests/heading_setext.rs b/tests/heading_setext.rs
index 22155f0..fa979be 100644
--- a/tests/heading_setext.rs
+++ b/tests/heading_setext.rs
@@ -258,6 +258,18 @@ fn heading_setext() {
);
assert_eq!(
+ micromark("a\n- ==="),
+ "<p>a</p>\n<ul>\n<li>===</li>\n</ul>",
+ "should not support piercing (1)"
+ );
+
+ assert_eq!(
+ micromark("a\n* ---"),
+ "<p>a</p>\n<ul>\n<li>\n<hr />\n</li>\n</ul>",
+ "should not support piercing (2)"
+ );
+
+ assert_eq!(
micromark_with_options(
"a\n-",
&Options {