aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-31 16:50:20 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-31 16:50:20 +0200
commitb1590a4fb0c28fdb6af866ea79c186ea57284493 (patch)
tree61264dc36135e7dae34a04992a99b9f3f71e7b8e /src
parent670f1d82e01ea2394b21d7d1857f41bdc67b3fce (diff)
downloadmarkdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.tar.gz
markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.tar.bz2
markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.zip
Add support for GFM tables
Diffstat (limited to 'src')
-rw-r--r--src/compiler.rs235
-rw-r--r--src/construct/document.rs16
-rw-r--r--src/construct/flow.rs41
-rw-r--r--src/construct/gfm_autolink_literal.rs1
-rw-r--r--src/construct/gfm_footnote_definition.rs2
-rw-r--r--src/construct/gfm_table.rs1042
-rw-r--r--src/construct/heading_setext.rs1
-rw-r--r--src/construct/label_end.rs1
-rw-r--r--src/construct/mod.rs2
-rw-r--r--src/event.rs264
-rw-r--r--src/lib.rs13
-rw-r--r--src/resolve.rs6
-rw-r--r--src/state.rs51
-rw-r--r--src/tokenizer.rs9
14 files changed, 1648 insertions, 36 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 9057505..5626f8a 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -68,6 +68,43 @@ struct Definition {
title: Option<String>,
}
+/// GFM table: column alignment.
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+enum GfmTableAlign {
+ /// No alignment.
+ ///
+ /// ```markdown
+ /// | | aaa |
+ /// > | | --- |
+ /// ^^^
+ /// ```
+ None,
+ /// Left alignment.
+ ///
+ /// ```markdown
+ /// | | aaa |
+ /// > | | :-- |
+ /// ^^^
+ /// ```
+ Left,
+ /// Center alignment.
+ ///
+ /// ```markdown
+ /// | | aaa |
+ /// > | | :-: |
+ /// ^^^
+ /// ```
+ Center,
+ /// Right alignment.
+ ///
+ /// ```markdown
+ /// | | aaa |
+ /// > | | --: |
+ /// ^^^
+ /// ```
+ Right,
+}
+
/// Context used to compile markdown.
#[allow(clippy::struct_excessive_bools)]
#[derive(Debug)]
@@ -107,6 +144,12 @@ struct CompileContext<'a> {
pub gfm_footnote_definitions: Vec<(String, String)>,
pub gfm_footnote_definition_calls: Vec<(String, usize)>,
pub gfm_footnote_definition_stack: Vec<(usize, usize)>,
+ /// Whether we are in a GFM table head.
+ pub gfm_table_in_head: bool,
+ /// Current GFM table alignment.
+ pub gfm_table_align: Option<Vec<GfmTableAlign>>,
+ /// Current GFM table column.
+ pub gfm_table_column: usize,
// Fields used to influance the current compilation.
/// Ignore the next line ending.
pub slurp_one_line_ending: bool,
@@ -153,6 +196,9 @@ impl<'a> CompileContext<'a> {
gfm_footnote_definitions: vec![],
gfm_footnote_definition_calls: vec![],
gfm_footnote_definition_stack: vec![],
+ gfm_table_in_head: false,
+ gfm_table_align: None,
+ gfm_table_column: 0,
tight_stack: vec![],
slurp_one_line_ending: false,
image_alt_inside: false,
@@ -350,6 +396,11 @@ fn enter(context: &mut CompileContext) {
Name::GfmFootnoteDefinition => on_enter_gfm_footnote_definition(context),
Name::GfmFootnoteCall => on_enter_gfm_footnote_call(context),
Name::GfmStrikethrough => on_enter_gfm_strikethrough(context),
+ Name::GfmTable => on_enter_gfm_table(context),
+ Name::GfmTableBody => on_enter_gfm_table_body(context),
+ Name::GfmTableCell => on_enter_gfm_table_cell(context),
+ Name::GfmTableHead => on_enter_gfm_table_head(context),
+ Name::GfmTableRow => on_enter_gfm_table_row(context),
Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context),
Name::HtmlFlow => on_enter_html_flow(context),
Name::HtmlText => on_enter_html_text(context),
@@ -407,6 +458,11 @@ fn exit(context: &mut CompileContext) {
Name::GfmFootnoteDefinitionPrefix => on_exit_gfm_footnote_definition_prefix(context),
Name::GfmFootnoteDefinition => on_exit_gfm_footnote_definition(context),
Name::GfmStrikethrough => on_exit_gfm_strikethrough(context),
+ Name::GfmTable => on_exit_gfm_table(context),
+ Name::GfmTableBody => on_exit_gfm_table_body(context),
+ Name::GfmTableCell => on_exit_gfm_table_cell(context),
+ Name::GfmTableHead => on_exit_gfm_table_head(context),
+ Name::GfmTableRow => on_exit_gfm_table_row(context),
Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context),
Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context),
Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context),
@@ -536,6 +592,105 @@ fn on_enter_gfm_strikethrough(context: &mut CompileContext) {
}
}
+/// Handle [`Enter`][Kind::Enter]:[`GfmTable`][Name::GfmTable].
+fn on_enter_gfm_table(context: &mut CompileContext) {
+ // Find the alignment.
+ let mut index = context.index;
+ let mut in_delimiter_row = false;
+ let mut align = vec![];
+
+ while index < context.events.len() {
+ let event = &context.events[index];
+
+ if in_delimiter_row {
+ if event.kind == Kind::Enter {
+ // Start of alignment value: set a new column.
+ if event.name == Name::GfmTableDelimiterCellValue {
+ align.push(
+ if context.events[index + 1].name == Name::GfmTableDelimiterMarker {
+ GfmTableAlign::Left
+ } else {
+ GfmTableAlign::None
+ },
+ );
+ }
+ } else {
+ // End of alignment value: change the column.
+ if event.name == Name::GfmTableDelimiterCellValue {
+ if context.events[index - 1].name == Name::GfmTableDelimiterMarker {
+ let align_index = align.len() - 1;
+ align[align_index] = if align[align_index] == GfmTableAlign::Left {
+ GfmTableAlign::Center
+ } else {
+ GfmTableAlign::Right
+ }
+ }
+ }
+ // Done!
+ else if event.name == Name::GfmTableDelimiterRow {
+ break;
+ }
+ }
+ } else if event.kind == Kind::Enter && event.name == Name::GfmTableDelimiterRow {
+ in_delimiter_row = true;
+ }
+
+ index += 1;
+ }
+
+ // Generate.
+ context.gfm_table_align = Some(align);
+ context.line_ending_if_needed();
+ context.push("<table>");
+}
+
+/// Handle [`Enter`][Kind::Enter]:[`GfmTableBody`][Name::GfmTableBody].
+fn on_enter_gfm_table_body(context: &mut CompileContext) {
+ context.push("<tbody>");
+}
+
+/// Handle [`Enter`][Kind::Enter]:[`GfmTableCell`][Name::GfmTableCell].
+fn on_enter_gfm_table_cell(context: &mut CompileContext) {
+ let column = context.gfm_table_column;
+ let align = context.gfm_table_align.as_ref().unwrap();
+
+ if column >= align.len() {
+ // Capture cell to ignore it.
+ context.buffer();
+ } else {
+ let value = align[column];
+ context.line_ending_if_needed();
+
+ if context.gfm_table_in_head {
+ context.push("<th");
+ } else {
+ context.push("<td");
+ }
+
+ match value {
+ GfmTableAlign::Left => context.push(" align=\"left\""),
+ GfmTableAlign::Right => context.push(" align=\"right\""),
+ GfmTableAlign::Center => context.push(" align=\"center\""),
+ GfmTableAlign::None => {}
+ }
+
+ context.push(">");
+ }
+}
+
+/// Handle [`Enter`][Kind::Enter]:[`GfmTableHead`][Name::GfmTableHead].
+fn on_enter_gfm_table_head(context: &mut CompileContext) {
+ context.line_ending_if_needed();
+ context.push("<thead>");
+ context.gfm_table_in_head = true;
+}
+
+/// Handle [`Enter`][Kind::Enter]:[`GfmTableRow`][Name::GfmTableRow].
+fn on_enter_gfm_table_row(context: &mut CompileContext) {
+ context.line_ending_if_needed();
+ context.push("<tr>");
+}
+
/// Handle [`Enter`][Kind::Enter]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
fn on_enter_gfm_task_list_item_check(context: &mut CompileContext) {
if !context.image_alt_inside {
@@ -892,7 +1047,24 @@ fn on_exit_raw_flow(context: &mut CompileContext) {
/// Handle [`Exit`][Kind::Exit]:{[`CodeText`][Name::CodeText],[`MathText`][Name::MathText]}.
fn on_exit_raw_text(context: &mut CompileContext) {
let result = context.resume();
- let mut bytes = result.as_bytes();
+ let mut bytes = result.as_bytes().to_vec();
+
+ // If we are in a GFM table, we need to decode escaped pipes.
+ // This is a rather weird GFM feature.
+ if context.gfm_table_align.is_some() {
+ let mut index = 0;
+ let mut len = bytes.len();
+
+ while index < len {
+ if index + 1 < len && bytes[index] == b'\\' && bytes[index + 1] == b'|' {
+ bytes.remove(index);
+ len -= 1;
+ }
+
+ index += 1;
+ }
+ }
+
let mut trim = false;
let mut index = 0;
let mut end = bytes.len();
@@ -910,11 +1082,12 @@ fn on_exit_raw_text(context: &mut CompileContext) {
}
if trim {
- bytes = &bytes[1..end];
+ bytes.remove(0);
+ bytes.pop();
}
context.raw_text_inside = false;
- context.push(str::from_utf8(bytes).unwrap());
+ context.push(str::from_utf8(&bytes).unwrap());
if !context.image_alt_inside {
context.push("</code>");
@@ -1113,6 +1286,62 @@ fn on_exit_gfm_strikethrough(context: &mut CompileContext) {
}
}
+/// Handle [`Exit`][Kind::Exit]:[`GfmTable`][Name::GfmTable].
+fn on_exit_gfm_table(context: &mut CompileContext) {
+ context.gfm_table_align = None;
+ context.line_ending_if_needed();
+ context.push("</table>");
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmTableBody`][Name::GfmTableBody].
+fn on_exit_gfm_table_body(context: &mut CompileContext) {
+ context.line_ending_if_needed();
+ context.push("</tbody>");
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmTableCell`][Name::GfmTableCell].
+fn on_exit_gfm_table_cell(context: &mut CompileContext) {
+ let align = context.gfm_table_align.as_ref().unwrap();
+
+ if context.gfm_table_column < align.len() {
+ if context.gfm_table_in_head {
+ context.push("</th>");
+ } else {
+ context.push("</td>");
+ }
+ } else {
+ // Stop capturing.
+ context.resume();
+ }
+
+ context.gfm_table_column += 1;
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmTableHead`][Name::GfmTableHead].
+fn on_exit_gfm_table_head(context: &mut CompileContext) {
+ context.gfm_table_in_head = false;
+ context.line_ending_if_needed();
+ context.push("</thead>");
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmTableRow`][Name::GfmTableRow].
+fn on_exit_gfm_table_row(context: &mut CompileContext) {
+ let mut column = context.gfm_table_column;
+ let len = context.gfm_table_align.as_ref().unwrap().len();
+
+ // Add “phantom” cells, for body rows that are shorter than the delimiter
+ // row (which is equal to the head row).
+ while column < len {
+ on_enter_gfm_table_cell(context);
+ on_exit_gfm_table_cell(context);
+ column += 1;
+ }
+
+ context.gfm_table_column = 0;
+ context.line_ending_if_needed();
+ context.push("</tr>");
+}
+
/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) {
if !context.image_alt_inside {
diff --git a/src/construct/document.rs b/src/construct/document.rs
index 9c76e46..e31e58d 100644
--- a/src/construct/document.rs
+++ b/src/construct/document.rs
@@ -269,6 +269,14 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
exit_containers(tokenizer, &Phase::Prefix);
}
+ // We are “piercing” into the flow with a new container.
+ tokenizer
+ .tokenize_state
+ .document_child
+ .as_mut()
+ .unwrap()
+ .pierce = true;
+
tokenizer
.tokenize_state
.document_container_stack
@@ -398,12 +406,11 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
let mut stack_index = child.stack.len();
// Use two algo’s: one for when we’re suspended or in multiline things
- // like definitions, another (b) for when we fed the line ending and closed
- // a)
+ // like definitions, another for when we fed the line ending and closed.
while !document_lazy_continuation_current && stack_index > 0 {
stack_index -= 1;
let name = &child.stack[stack_index];
- if name == &Name::Paragraph || name == &Name::Definition {
+ if name == &Name::Paragraph || name == &Name::Definition || name == &Name::GfmTableHead {
document_lazy_continuation_current = true;
}
}
@@ -418,6 +425,9 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
}
}
+ // Reset “piercing”.
+ child.pierce = false;
+
if child.lazy
&& tokenizer.tokenize_state.document_lazy_accepting_before
&& document_lazy_continuation_current
diff --git a/src/construct/flow.rs b/src/construct/flow.rs
index 3f1cd77..3f7bc9c 100644
--- a/src/construct/flow.rs
+++ b/src/construct/flow.rs
@@ -65,29 +65,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::HtmlFlowStart)
}
- // Note: `-` is also used in thematic breaks so it’s not included here.
- Some(b'=') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::HeadingSetextStart)
- }
- Some(b'[') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::DefinitionStart)
- }
// Actual parsing: blank line? Indented code? Indented anything?
- // Also includes `-` which can be a setext heading underline or thematic break.
- None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
- // Must be a paragraph.
- Some(_) => {
- tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok);
- State::Retry(StateName::ParagraphStart)
- }
+ // Tables, setext heading underlines, definitions, and paragraphs are
+ // particularly weird.
+ _ => State::Retry(StateName::FlowBlankLineBefore),
}
}
@@ -185,11 +166,25 @@ pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State {
pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeDefinition),
+ State::Next(StateName::FlowBeforeGfmTable),
);
State::Retry(StateName::ThematicBreakStart)
}
+/// At GFM table.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// ```
+pub fn before_gfm_table(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeDefinition),
+ );
+ State::Retry(StateName::GfmTableStart)
+}
+
/// At definition.
///
/// ```markdown
diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs
index 7fdeb01..704c536 100644
--- a/src/construct/gfm_autolink_literal.rs
+++ b/src/construct/gfm_autolink_literal.rs
@@ -5,7 +5,6 @@ use crate::tokenizer::Tokenizer;
use crate::util::classify_character::{classify, Kind as CharacterKind};
use crate::util::slice::{Position, Slice};
use alloc::vec::Vec;
-extern crate std;
use core::str;
// To do: doc al functions.
diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs
index 3715044..cbe816f 100644
--- a/src/construct/gfm_footnote_definition.rs
+++ b/src/construct/gfm_footnote_definition.rs
@@ -141,7 +141,7 @@
//!
//! ## References
//!
-//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-footnote)
+//! * [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote)
//!
//! > 👉 **Note**: Footnotes are not specified in GFM yet.
//! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270)
diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs
new file mode 100644
index 0000000..d7c2b69
--- /dev/null
+++ b/src/construct/gfm_table.rs
@@ -0,0 +1,1042 @@
+//! GFM: table occurs in the [flow][] content type.
+//!
+//! ## Grammar
+//!
+//! Tables form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! gfm_table ::= gfm_table_head 0*(eol gfm_table_body_row)
+//!
+//! ; Restriction: both rows must have the same number of cells.
+//! gfm_table_head ::= gfm_table_row eol gfm_table_delimiter_row
+//!
+//! gfm_table_row ::= ['|'] gfm_table_cell 0*('|' gfm_table_cell) ['|'] *space_or_tab
+//! gfm_table_cell ::= *space_or_tab gfm_table_text *space_or_tab
+//! gfm_table_text ::= 0*(line - '\\' - '|' | '\\' ['\\' | '|'])
+//
+//! gfm_table_delimiter_row ::= ['|'] gfm_table_delimiter_cell 0*('|' gfm_table_delimiter_cell) ['|'] *space_or_tab
+//! gfm_table_delimiter_cell ::= *space_or_tab gfm_table_delimiter_value *space_or_tab
+//! gfm_table_delimiter_value ::= [':'] 1*'-' [':']
+//! ```
+//!
+//! As this construct occurs in flow, like all flow constructs, it must be
+//! followed by an eol (line ending) or eof (end of file).
+//!
+//! The above grammar shows that basically anything can be a cell or a row.
+//! The main thing that makes something a row, is that it occurs directly before
+//! or after a delimiter row, or after another row.
+//!
+//! It is not required for a table to have a body: it can end right after the
+//! delimiter row.
+//!
+//! Each column can be marked with an alignment.
+//! The alignment marker is a colon (`:`) used before and/or after delimiter row
+//! filler.
+//! To illustrate:
+//!
+//! ```markdown
+//! | none | left | right | center |
+//! | ---- | :--- | ----: | :----: |
+//! ```
+//!
+//! The number of cells in the delimiter row, is the number of columns of the
+//! table.
+//! Only the head row is required to have the same number of cells.
+//! Body rows are not required to have a certain number of cells.
+//! For body rows that have less cells than the number of columns of the table,
+//! empty cells are injected.
+//! When a row has more cells than the number of columns of the table, the
+//! superfluous cells are dropped.
+//! To illustrate:
+//!
+//! ```markdown
+//! | a | b |
+//! | - | - |
+//! | c |
+//! | d | e | f |
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <table>
+//! <thead>
+//! <tr>
+//! <th>a</th>
+//! <th>b</th>
+//! </tr>
+//! </thead>
+//! <tbody>
+//! <tr>
+//! <td>c</td>
+//! <td></td>
+//! </tr>
+//! <tr>
+//! <td>d</td>
+//! <td>e</td>
+//! </tr>
+//! </tbody>
+//! </table>
+//! ```
+//!
+//! Each cell’s text is interpreted as the [text][] content type.
+//! That means that it can include constructs such as [attention][attention].
+//!
+//! The grammar for cells prohibits the use of `|` in them.
+//! To use pipes in cells, encode them as a character reference or character
+//! escape: `&vert;` (or `&VerticalLine;`, `&verbar;`, `&#124;`, `&#x7c;`) or
+//! `\|`.
+//!
+//! Escapes will typically work, but they are not supported in
+//! [code (text)][raw_text] (and the math (text) extension).
+//! To work around this, GitHub came up with a rather weird “trick”.
+//! When inside a table cell *and* inside code, escaped pipes *are* decoded.
+//! To illustrate:
+//!
+//! ```markdown
+//! | Name | Character |
+//! | - | - |
+//! | Left curly brace | `{` |
+//! | Pipe | `\|` |
+//! | Right curly brace | `}` |
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <table>
+//! <thead>
+//! <tr>
+//! <th>Name</th>
+//! <th>Character</th>
+//! </tr>
+//! </thead>
+//! <tbody>
+//! <tr>
+//! <td>Left curly brace</td>
+//! <td><code>{</code></td>
+//! </tr>
+//! <tr>
+//! <td>Pipe</td>
+//! <td><code>|</code></td>
+//! </tr>
+//! <tr>
+//! <td>Right curly brace</td>
+//! <td><code>}</code></td>
+//! </tr>
+//! </tbody>
+//! </table>
+//! ```
+//!
+//! > 👉 **Note**: no other character can be escaped like this.
+//! > Escaping pipes in code does not work when not inside a table, either.
+//!
+//! ## HTML
+//!
+//! GFM tables relate to several HTML elements: `<table>`, `<tbody>`, `<td>`,
+//! `<th>`, `<thead>`, and `<tr>`.
+//! See
+//! [*§ 4.9.1 The `table` element*][html_table],
+//! [*§ 4.9.5 The `tbody` element*][html_tbody],
+//! [*§ 4.9.9 The `td` element*][html_td],
+//! [*§ 4.9.10 The `th` element*][html_th],
+//! [*§ 4.9.6 The `thead` element*][html_thead], and
+//! [*§ 4.9.8 The `tr` element*][html_tr]
+//! in the HTML spec for more info.
+//!
+//! If the the alignment of a column is left, right, or center, a deprecated
+//! `align` attribute is added to each `<th>` and `<td>` element belonging to
+//! that column.
+//! That attribute is interpreted by browsers as if a CSS `text-align` property
+//! was included, with its value set to that same keyword.
+//!
+//! ## Recommendation
+//!
+//! When authoring markdown with GFM tables, it’s recommended to *always* put
+//! pipes around cells.
+//! Without them, it can be hard to infer whether the table will work, how many
+//! columns there are, and which column you are currently editing.
+//!
+//! It is recommended to not use many columns, as it results in very long lines,
+//! making it hard to infer which column you are currently editing.
+//!
+//! For larger tables, particularly when cells vary in size, it is recommended
+//! *not* to manually “pad” cell text.
+//! While it can look better, it results in a lot of time spent realigning
+//! everything when a new, longer cell is added or the longest cell removed, as
+//! every row then must be changed.
+//! Other than costing time, it also causes large diffs in Git.
+//!
+//! To illustrate, when authoring large tables, it is discouraged to pad cells
+//! like this:
+//!
+//! ```markdown
+//! | Alpha bravo charlie | delta |
+//! | ------------------- | -----------------: |
+//! | Echo | Foxtrot golf hotel |
+//! ```
+//!
+//! Instead, use single spaces (and single filler dashes):
+//!
+//! ```markdown
+//! | Alpha bravo charlie | delta |
+//! | - | -: |
+//! | Echo | Foxtrot golf hotel |
+//! ```
+//!
+//! ## Bugs
+//!
+//! GitHub’s own algorithm to parse tables contains a bug.
+//! This bug is not present in this project.
+//! The issue relating to tables is:
+//!
+//! * [GFM tables: escaped escapes are incorrectly treated as escapes](https://github.com/github/cmark-gfm/issues/277)\
+//!
+//! ## Tokens
+//!
+//! * [`GfmTable`][Name::GfmTable]
+//! * [`GfmTableBody`][Name::GfmTableBody]
+//! * [`GfmTableCell`][Name::GfmTableCell]
+//! * [`GfmTableCellDivider`][Name::GfmTableCellDivider]
+//! * [`GfmTableCellText`][Name::GfmTableCellText]
+//! * [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell]
+//! * [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue]
+//! * [`GfmTableDelimiterFiller`][Name::GfmTableDelimiterFiller]
+//! * [`GfmTableDelimiterMarker`][Name::GfmTableDelimiterMarker]
+//! * [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow]
+//! * [`GfmTableHead`][Name::GfmTableHead]
+//! * [`GfmTableRow`][Name::GfmTableRow]
+//! * [`LineEnding`][Name::LineEnding]
+//!
+//! ## References
+//!
+//! * [`micromark-extension-gfm-table`](https://github.com/micromark/micromark-extension-gfm-table)
+//! * [*§ 4.10 Tables (extension)* in `GFM`](https://github.github.com/gfm/#tables-extension-)
+//!
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
+//! [attention]: crate::construct::attention
+//! [raw_text]: crate::construct::raw_text
+//! [html_table]: https://html.spec.whatwg.org/multipage/tables.html#the-table-element
+//! [html_tbody]: https://html.spec.whatwg.org/multipage/tables.html#the-tbody-element
+//! [html_td]: https://html.spec.whatwg.org/multipage/tables.html#the-td-element
+//! [html_th]: https://html.spec.whatwg.org/multipage/tables.html#the-th-element
+//! [html_thead]: https://html.spec.whatwg.org/multipage/tables.html#the-thead-element
+//! [html_tr]: https://html.spec.whatwg.org/multipage/tables.html#the-tr-element
+
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
+use crate::event::{Content, Event, Kind, Link, Name};
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back};
+use alloc::vec;
+
+/// Start of a GFM table.
+///
+/// If there is a valid table row or table head before, then we try to parse
+/// another row.
+/// Otherwise, we try to parse a head.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// | | - |
+/// > | | b |
+/// ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.parse_state.options.constructs.gfm_table {
+ if !tokenizer.pierce
+ && !tokenizer.events.is_empty()
+ && matches!(
+ tokenizer.events[skip_opt_back(
+ &tokenizer.events,
+ tokenizer.events.len() - 1,
+ &[Name::LineEnding, Name::SpaceOrTab],
+ )]
+ .name,
+ Name::GfmTableHead | Name::GfmTableRow
+ )
+ {
+ State::Retry(StateName::GfmTableBodyRowStart)
+ } else {
+ State::Retry(StateName::GfmTableHeadRowBefore)
+ }
+ } else {
+ State::Nok
+ }
+}
+
+/// Before table head row.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// | | - |
+/// | | b |
+/// ```
+pub fn head_row_before(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.enter(Name::GfmTableHead);
+ tokenizer.enter(Name::GfmTableRow);
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::GfmTableHeadRowStart), State::Nok);
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.options.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ } else {
+ State::Retry(StateName::GfmTableHeadRowStart)
+ }
+}
+
+/// Before table head row, after whitespace.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// | | - |
+/// | | b |
+/// ```
+pub fn head_row_start(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ // 4+ spaces.
+ Some(b'\t' | b' ') => State::Nok,
+ Some(b'|') => State::Retry(StateName::GfmTableHeadRowBreak),
+ _ => {
+ tokenizer.tokenize_state.seen = true;
+ State::Retry(StateName::GfmTableHeadRowBreak)
+ }
+ }
+}
+
+/// At break in table head row.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// ^
+/// ^
+/// | | - |
+/// | | b |
+/// ```
+pub fn head_row_break(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => {
+ tokenizer.tokenize_state.seen = false;
+ State::Nok
+ }
+ Some(b'\n') => {
+ // Feel free to interrupt:
+ tokenizer.interrupt = true;
+ tokenizer.exit(Name::GfmTableRow);
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ State::Next(StateName::GfmTableHeadDelimiterStart)
+ }
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ }
+ _ => {
+ // Whether a delimiter was seen.
+ if tokenizer.tokenize_state.seen {
+ tokenizer.tokenize_state.seen = false;
+ // Header cell count.
+ tokenizer.tokenize_state.size += 1;
+ }
+
+ if tokenizer.current == Some(b'|') {
+ tokenizer.enter(Name::GfmTableCellDivider);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableCellDivider);
+ // Whether a delimiter was seen.
+ tokenizer.tokenize_state.seen = true;
+ State::Next(StateName::GfmTableHeadRowBreak)
+ } else {
+ // Anything else is cell data.
+ tokenizer.enter(Name::Data);
+ State::Retry(StateName::GfmTableHeadRowData)
+ }
+ }
+ }
+}
+
+/// In table head row data.
+///
+/// ```markdown
+/// > | | a |
+/// ^
+/// | | - |
+/// | | b |
+/// ```
+pub fn head_row_data(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\t' | b'\n' | b' ' | b'|') => {
+ tokenizer.exit(Name::Data);
+ State::Retry(StateName::GfmTableHeadRowBreak)
+ }
+ _ => {
+ let name = if tokenizer.current == Some(b'\\') {
+ StateName::GfmTableHeadRowEscape
+ } else {
+ StateName::GfmTableHeadRowData
+ };
+ tokenizer.consume();
+ State::Next(name)
+ }
+ }
+}
+
+/// In table head row escape.
+///
+/// ```markdown
+/// > | | a\-b |
+/// ^
+/// | | ---- |
+/// | | c |
+/// ```
+pub fn head_row_escape(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\\' | b'|') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmTableHeadRowData)
+ }
+ _ => State::Retry(StateName::GfmTableHeadRowData),
+ }
+}
+
+/// Before delimiter row.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// | | b |
+/// ```
+pub fn head_delimiter_start(tokenizer: &mut Tokenizer) -> State {
+ // Reset `interrupt`.
+ tokenizer.interrupt = false;
+
+ if tokenizer.lazy || tokenizer.pierce {
+ State::Nok
+ } else {
+ tokenizer.enter(Name::GfmTableDelimiterRow);
+ // Track if we’ve seen a `:` or `|`.
+ tokenizer.tokenize_state.seen = false;
+
+ match tokenizer.current {
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(
+ State::Next(StateName::GfmTableHeadDelimiterBefore),
+ State::Next(StateName::GfmTableHeadDelimiterNok),
+ );
+
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.options.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ }
+ _ => State::Retry(StateName::GfmTableHeadDelimiterBefore),
+ }
+ }
+}
+
+/// Before delimiter row, after optional whitespace.
+///
+/// Reused when a `|` is found later, to parse another cell.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// | | b |
+/// ```
+pub fn head_delimiter_before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'-' | b':') => State::Retry(StateName::GfmTableHeadDelimiterValueBefore),
+ Some(b'|') => {
+ tokenizer.tokenize_state.seen = true;
+ // If we start with a pipe, we open a cell marker.
+ tokenizer.enter(Name::GfmTableCellDivider);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableCellDivider);
+ State::Next(StateName::GfmTableHeadDelimiterCellBefore)
+ }
+ // More whitespace / empty row not allowed at start.
+ _ => State::Retry(StateName::GfmTableHeadDelimiterNok),
+ }
+}
+
+/// After `|`, before delimiter cell.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// ```
+pub fn head_delimiter_cell_before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(
+ State::Next(StateName::GfmTableHeadDelimiterValueBefore),
+ State::Nok,
+ );
+ State::Retry(space_or_tab(tokenizer))
+ }
+ _ => State::Retry(StateName::GfmTableHeadDelimiterValueBefore),
+ }
+}
+
+/// Before delimiter cell value.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// ```
+pub fn head_delimiter_value_before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => State::Retry(StateName::GfmTableHeadDelimiterCellAfter),
+ Some(b':') => {
+ // Align: left.
+ tokenizer.tokenize_state.size_b += 1;
+ tokenizer.tokenize_state.seen = true;
+ tokenizer.enter(Name::GfmTableDelimiterMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableDelimiterMarker);
+ State::Next(StateName::GfmTableHeadDelimiterLeftAlignmentAfter)
+ }
+ Some(b'-') => {
+ // Align: none.
+ tokenizer.tokenize_state.size_b += 1;
+ State::Retry(StateName::GfmTableHeadDelimiterLeftAlignmentAfter)
+ }
+ _ => State::Retry(StateName::GfmTableHeadDelimiterNok),
+ }
+}
+
+/// After delimiter cell left alignment marker.
+///
+/// ```markdown
+/// | | a |
+/// > | | :- |
+/// ^
+/// ```
+pub fn head_delimiter_left_alignment_after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'-') => {
+ tokenizer.enter(Name::GfmTableDelimiterFiller);
+ State::Retry(StateName::GfmTableHeadDelimiterFiller)
+ }
+ // Anything else is not ok after the left-align colon.
+ _ => State::Retry(StateName::GfmTableHeadDelimiterNok),
+ }
+}
+
+/// In delimiter cell filler.
+///
+/// ```markdown
+/// | | a |
+/// > | | - |
+/// ^
+/// ```
+pub fn head_delimiter_filler(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'-') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmTableHeadDelimiterFiller)
+ }
+ Some(b':') => {
+ // Align is `center` if it was `left`, `right` otherwise.
+ tokenizer.tokenize_state.seen = true;
+ tokenizer.exit(Name::GfmTableDelimiterFiller);
+ tokenizer.enter(Name::GfmTableDelimiterMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableDelimiterMarker);
+ State::Next(StateName::GfmTableHeadDelimiterRightAlignmentAfter)
+ }
+ _ => {
+ tokenizer.exit(Name::GfmTableDelimiterFiller);
+ State::Retry(StateName::GfmTableHeadDelimiterRightAlignmentAfter)
+ }
+ }
+}
+
+/// After delimiter cell right alignment marker.
+///
+/// ```markdown
+/// | | a |
+/// > | | -: |
+/// ^
+/// ```
+pub fn head_delimiter_right_alignment_after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(
+ State::Next(StateName::GfmTableHeadDelimiterCellAfter),
+ State::Nok,
+ );
+ State::Retry(space_or_tab(tokenizer))
+ }
+ _ => State::Retry(StateName::GfmTableHeadDelimiterCellAfter),
+ }
+}
+
+/// After delimiter cell.
+///
+/// ```markdown
+/// | | a |
+/// > | | -: |
+/// ^
+/// ```
+pub fn head_delimiter_cell_after(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ // Exit when:
+ // * there was no `:` or `|` at all (it’s a thematic break or setext
+ // underline instead)
+ // * the header cell count is not the delimiter cell count
+ if !tokenizer.tokenize_state.seen
+ || tokenizer.tokenize_state.size != tokenizer.tokenize_state.size_b
+ {
+ State::Retry(StateName::GfmTableHeadDelimiterNok)
+ } else {
+ // Reset.
+ tokenizer.tokenize_state.seen = false;
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.size_b = 0;
+ tokenizer.exit(Name::GfmTableDelimiterRow);
+ tokenizer.exit(Name::GfmTableHead);
+ tokenizer.register_resolver(ResolveName::GfmTable);
+ State::Ok
+ }
+ }
+ Some(b'|') => State::Retry(StateName::GfmTableHeadDelimiterBefore),
+ _ => State::Retry(StateName::GfmTableHeadDelimiterNok),
+ }
+}
+
+/// In delimiter row, at a disallowed byte.
+///
+/// ```markdown
+/// | | a |
+/// > | | x |
+/// ^
+/// ```
+pub fn head_delimiter_nok(tokenizer: &mut Tokenizer) -> State {
+ // Reset.
+ tokenizer.tokenize_state.seen = false;
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.size_b = 0;
+ State::Nok
+}
+
+/// Before table body row.
+///
+/// ```markdown
+/// | | a |
+/// | | - |
+/// > | | b |
+/// ^
+/// ```
+pub fn body_row_start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.lazy {
+ State::Nok
+ } else {
+ tokenizer.enter(Name::GfmTableRow);
+
+ match tokenizer.current {
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBefore), State::Nok);
+
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.options.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ }
+ _ => State::Retry(StateName::GfmTableBodyRowBefore),
+ }
+ }
+}
+
+/// Before table body row, after optional whitespace.
+///
+/// ```markdown
+/// | | a |
+/// | | - |
+/// > | | b |
+/// ^
+/// ```
+pub fn body_row_before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\t' | b' ') => State::Nok,
+ _ => State::Retry(StateName::GfmTableBodyRowBreak),
+ }
+}
+
+/// At break in table body row.
+///
+/// ```markdown
+/// | | a |
+/// | | - |
+/// > | | b |
+/// ^
+/// ^
+/// ^
+/// ```
+pub fn body_row_break(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::GfmTableRow);
+ State::Ok
+ }
+ Some(b'\t' | b' ') => {
+ tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBreak), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ }
+ Some(b'|') => {
+ tokenizer.enter(Name::GfmTableCellDivider);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmTableCellDivider);
+ State::Next(StateName::GfmTableBodyRowBreak)
+ }
+ // Anything else is cell content.
+ _ => {
+ tokenizer.enter(Name::Data);
+ State::Retry(StateName::GfmTableBodyRowData)
+ }
+ }
+}
+
+/// In table body row data.
+///
+/// ```markdown
+/// | | a |
+/// | | - |
+/// > | | b |
+/// ^
+/// ```
+pub fn body_row_data(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\t' | b'\n' | b' ' | b'|') => {
+ tokenizer.exit(Name::Data);
+ State::Retry(StateName::GfmTableBodyRowBreak)
+ }
+ _ => {
+ let name = if tokenizer.current == Some(b'\\') {
+ StateName::GfmTableBodyRowEscape
+ } else {
+ StateName::GfmTableBodyRowData
+ };
+ tokenizer.consume();
+ State::Next(name)
+ }
+ }
+}
+
+/// In table body row escape.
+///
+/// ```markdown
+/// | | a |
+/// | | ---- |
+/// > | | b\-c |
+/// ^
+/// ```
+pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'\\' | b'|') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmTableBodyRowData)
+ }
+ _ => State::Retry(StateName::GfmTableBodyRowData),
+ }
+}
+
+/// Resolve GFM table.
+pub fn resolve(tokenizer: &mut Tokenizer) {
+ let mut index = 0;
+ // let mut tables = vec![];
+ let mut in_first_cell_awaiting_pipe = true;
+ let mut in_row = false;
+ let mut in_delimiter_row = false;
+ let mut last_cell = (0, 0, 0, 0);
+ let mut cell = (0, 0, 0, 0);
+
+ let mut after_head_awaiting_first_body_row = false;
+ let mut last_table_end = 0;
+ let mut last_table_has_body = false;
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.kind == Kind::Enter {
+ // Start of head.
+ if event.name == Name::GfmTableHead {
+ after_head_awaiting_first_body_row = false;
+
+ // Inject previous (body end and) table end.
+ if last_table_end != 0 {
+ flush_table_end(tokenizer, last_table_end, last_table_has_body);
+ last_table_has_body = false;
+ last_table_end = 0;
+ }
+
+ // Inject table start.
+ tokenizer.map.add(
+ index,
+ 0,
+ vec![Event {
+ kind: Kind::Enter,
+ name: Name::GfmTable,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ }],
+ );
+ } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow {
+ in_delimiter_row = event.name == Name::GfmTableDelimiterRow;
+ in_row = true;
+ in_first_cell_awaiting_pipe = true;
+ last_cell = (0, 0, 0, 0);
+ cell = (0, index + 1, 0, 0);
+
+ // Inject table body start.
+ if after_head_awaiting_first_body_row {
+ after_head_awaiting_first_body_row = false;
+ last_table_has_body = true;
+ tokenizer.map.add(
+ index,
+ 0,
+ vec![Event {
+ kind: Kind::Enter,
+ name: Name::GfmTableBody,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ }],
+ );
+ }
+ }
+ // Cell data.
+ else if in_row
+ && (event.name == Name::Data
+ || event.name == Name::GfmTableDelimiterMarker
+ || event.name == Name::GfmTableDelimiterFiller)
+ {
+ in_first_cell_awaiting_pipe = false;
+
+ // First value in cell.
+ if cell.2 == 0 {
+ if last_cell.1 != 0 {
+ cell.0 = cell.1;
+ flush_cell(tokenizer, last_cell, in_delimiter_row, None);
+ last_cell = (0, 0, 0, 0);
+ }
+
+ cell.2 = index;
+ }
+ } else if event.name == Name::GfmTableCellDivider {
+ if in_first_cell_awaiting_pipe {
+ in_first_cell_awaiting_pipe = false;
+ } else {
+ if last_cell.1 != 0 {
+ cell.0 = cell.1;
+ flush_cell(tokenizer, last_cell, in_delimiter_row, None);
+ }
+
+ last_cell = cell;
+ cell = (last_cell.1, index, 0, 0);
+ }
+ }
+ // Exit events.
+ } else if event.name == Name::GfmTableHead {
+ after_head_awaiting_first_body_row = true;
+ last_table_end = index;
+ } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow {
+ in_row = false;
+ last_table_end = index;
+ if last_cell.1 != 0 {
+ cell.0 = cell.1;
+ flush_cell(tokenizer, last_cell, in_delimiter_row, Some(index));
+ } else if cell.1 != 0 {
+ flush_cell(tokenizer, cell, in_delimiter_row, Some(index));
+ }
+ } else if in_row
+ && (event.name == Name::Data
+ || event.name == Name::GfmTableDelimiterMarker
+ || event.name == Name::GfmTableDelimiterFiller)
+ {
+ cell.3 = index;
+ }
+
+ index += 1;
+ }
+
+ if last_table_end != 0 {
+ flush_table_end(tokenizer, last_table_end, last_table_has_body);
+ }
+}
+
+/// Generate a cell.
+fn flush_cell(
+ tokenizer: &mut Tokenizer,
+ range: (usize, usize, usize, usize),
+ in_delimiter_row: bool,
+ row_end: Option<usize>,
+) {
+ let group_name = if in_delimiter_row {
+ Name::GfmTableDelimiterCell
+ } else {
+ Name::GfmTableCell
+ };
+ let value_name = if in_delimiter_row {
+ Name::GfmTableDelimiterCellValue
+ } else {
+ Name::GfmTableCellText
+ };
+
+ // Insert an exit for the previous cell, if there is one.
+ //
+ // ```markdown
+ // > | | aa | bb | cc |
+ // ^-- exit
+ // ^^^^-- this cell
+ // ```
+ if range.0 != 0 {
+ tokenizer.map.add(
+ range.0,
+ 0,
+ vec![Event {
+ kind: Kind::Exit,
+ name: group_name.clone(),
+ point: tokenizer.events[range.0].point.clone(),
+ link: None,
+ }],
+ );
+ }
+
+ // Insert enter of this cell.
+ //
+ // ```markdown
+ // > | | aa | bb | cc |
+ // ^-- enter
+ // ^^^^-- this cell
+ // ```
+ tokenizer.map.add(
+ range.1,
+ 0,
+ vec![Event {
+ kind: Kind::Enter,
+ name: group_name.clone(),
+ point: tokenizer.events[range.1].point.clone(),
+ link: None,
+ }],
+ );
+
+ // Insert text start at first data start and end at last data end, and
+ // remove events between.
+ //
+ // ```markdown
+ // > | | aa | bb | cc |
+ // ^-- enter
+ // ^-- exit
+ // ^^^^-- this cell
+ // ```
+ if range.2 != 0 {
+ tokenizer.map.add(
+ range.2,
+ 0,
+ vec![Event {
+ kind: Kind::Enter,
+ name: value_name.clone(),
+ point: tokenizer.events[range.2].point.clone(),
+ link: None,
+ }],
+ );
+ debug_assert_ne!(range.3, 0);
+
+ if !in_delimiter_row {
+ tokenizer.events[range.2].link = Some(Link {
+ previous: None,
+ next: None,
+ content: Content::Text,
+ });
+
+ if !in_delimiter_row && range.3 > range.2 + 1 {
+ let a = range.2 + 1;
+ let b = range.3 - range.2 - 1;
+ tokenizer.map.add(a, b, vec![]);
+ }
+ }
+
+ tokenizer.map.add(
+ range.3 + 1,
+ 0,
+ vec![Event {
+ kind: Kind::Exit,
+ name: value_name,
+ point: tokenizer.events[range.3].point.clone(),
+ link: None,
+ }],
+ );
+ }
+
+ // Insert an exit for the last cell, if at the row end.
+ //
+ // ```markdown
+ // > | | aa | bb | cc |
+ // ^-- exit
+ // ^^^^^^-- this cell (the last one contains two “between” parts)
+ // ```
+ if let Some(row_end) = row_end {
+ tokenizer.map.add(
+ row_end,
+ 0,
+ vec![Event {
+ kind: Kind::Exit,
+ name: group_name,
+ point: tokenizer.events[row_end].point.clone(),
+ link: None,
+ }],
+ );
+ }
+}
+
+/// Generate table end (and table body end).
+fn flush_table_end(tokenizer: &mut Tokenizer, index: usize, body: bool) {
+ let mut exits = vec![];
+
+ if body {
+ exits.push(Event {
+ kind: Kind::Exit,
+ name: Name::GfmTableBody,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ });
+ }
+
+ exits.push(Event {
+ kind: Kind::Exit,
+ name: Name::GfmTable,
+ point: tokenizer.events[index].point.clone(),
+ link: None,
+ });
+
+ tokenizer.map.add(index + 1, 0, exits);
+}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index df1d4fb..e9cc759 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -87,6 +87,7 @@ use alloc::vec;
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.options.constructs.heading_setext
&& !tokenizer.lazy
+ && !tokenizer.pierce
// Require a paragraph before.
&& (!tokenizer.events.is_empty()
&& tokenizer.events[skip_opt_back(
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index b5a6013..8a9edfb 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -191,7 +191,6 @@ use crate::util::{
slice::{Position, Slice},
};
use alloc::{string::String, vec};
-extern crate std;
/// Start of label end.
///
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 9add015..de88174 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -60,6 +60,7 @@
//! * [frontmatter][]
//! * [gfm autolink literal][gfm_autolink_literal]
//! * [gfm footnote definition][gfm_footnote_definition]
+//! * [gfm table][gfm_table]
//! * [gfm task list item check][gfm_task_list_item_check]
//! * [gfm label start footnote][gfm_label_start_footnote]
//! * math (text) (in `raw_text`)
@@ -151,6 +152,7 @@ pub mod frontmatter;
pub mod gfm_autolink_literal;
pub mod gfm_footnote_definition;
pub mod gfm_label_start_footnote;
+pub mod gfm_table;
pub mod gfm_task_list_item_check;
pub mod hard_break_escape;
pub mod heading_atx;
diff --git a/src/event.rs b/src/event.rs
index 3e540c0..ba266b4 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -1240,6 +1240,265 @@ pub enum Name {
/// ^
/// ```
GfmStrikethroughText,
+ /// GFM extension: Table.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [flow content][crate::construct::flow]
+ /// * **Content model**:
+ /// [`GfmTableBody`][Name::GfmTableBody],
+ /// [`GfmTableHead`][Name::GfmTableHead],
+ /// [`LineEnding`][Name::LineEnding]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// > | | - |
+ /// ^^^^^
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ GfmTable,
+ /// GFM extension: Table body.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTable`][Name::GfmTable]
+ /// * **Content model**:
+ /// [`GfmTableRow`][Name::GfmTableRow],
+ /// [`LineEnding`][Name::LineEnding]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// | | - |
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ GfmTableBody,
+ /// GFM extension: Table cell.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableRow`][Name::GfmTableRow]
+ /// * **Content model**:
+ /// [`GfmTableCellDivider`][Name::GfmTableCellDivider],
+ /// [`GfmTableCellText`][Name::GfmTableCellText],
+ /// [`SpaceOrTab`][Name::SpaceOrTab]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// | | - |
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ GfmTableCell,
+ /// GFM extension: Table cell text.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableCell`][Name::GfmTableCell]
+ /// * **Content model**:
+ /// [text content][crate::construct::text]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^
+ /// | | - |
+ /// > | | b |
+ /// ^
+ /// ```
+ GfmTableCellText,
+ /// GFM extension: Table cell divider.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableCell`][Name::GfmTableCell]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^ ^
+ /// > | | - |
+ /// ^ ^
+ /// > | | b |
+ /// ^ ^
+ /// ```
+ GfmTableCellDivider,
+ /// GFM extension: Table delimiter row.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableHead`][Name::GfmTableHead]
+ /// * **Content model**:
+ /// [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | - |
+ /// ^^^^^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterRow,
+ /// GFM extension: Table delimiter alignment marker.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | :- |
+ /// ^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterMarker,
+ /// GFM extension: Table delimiter cell.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow]
+ /// * **Content model**:
+ /// [`GfmTableCellDivider`][Name::GfmTableCellDivider],
+ /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue],
+ /// [`SpaceOrTab`][Name::SpaceOrTab]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | - |
+ /// ^^^^^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterCell,
+ /// GFM extension: Table delimiter cell alignment.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell]
+ /// * **Content model**:
+ /// [`GfmTableDelimiterMarker`][Name::GfmTableDelimiterMarker],
+ /// [`GfmTableDelimiterFiller`][Name::GfmTableDelimiterFiller]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | - |
+ /// ^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterCellValue,
+ /// GFM extension: Table delimiter filler.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// | | a |
+ /// > | | - |
+ /// ^
+ /// | | b |
+ /// ```
+ GfmTableDelimiterFiller,
+ /// GFM extension: Table head.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTable`][Name::GfmTable]
+ /// * **Content model**:
+ /// [`GfmTableRow`][Name::GfmTableRow],
+ /// [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow],
+ /// [`LineEnding`][Name::LineEnding]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// > | | - |
+ /// ^^^^^
+ /// | | b |
+ /// ```
+ GfmTableHead,
+ /// GFM extension: Table row.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmTableBody`][Name::GfmTableBody],
+ /// [`GfmTableHead`][Name::GfmTableHead]
+ /// * **Content model**:
+ /// [`GfmTableCell`][Name::GfmTableCell]
+ /// * **Construct**:
+ /// [`gfm_table`][crate::construct::gfm_table]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// | | - |
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ GfmTableRow,
/// GFM extension: task list item check.
///
/// ## Info
@@ -2436,7 +2695,7 @@ pub enum Name {
}
/// List of void events, used to make sure everything is working well.
-pub const VOID_EVENTS: [Name; 57] = [
+pub const VOID_EVENTS: [Name; 60] = [
Name::AttentionSequence,
Name::AutolinkEmail,
Name::AutolinkMarker,
@@ -2469,6 +2728,9 @@ pub const VOID_EVENTS: [Name; 57] = [
Name::GfmFootnoteDefinitionLabelMarker,
Name::GfmFootnoteDefinitionMarker,
Name::GfmStrikethroughSequence,
+ Name::GfmTableCellDivider,
+ Name::GfmTableDelimiterMarker,
+ Name::GfmTableDelimiterFiller,
Name::GfmTaskListItemMarker,
Name::GfmTaskListItemValueChecked,
Name::GfmTaskListItemValueUnchecked,
diff --git a/src/lib.rs b/src/lib.rs
index 4d1b762..0cf4f49 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -191,6 +191,17 @@ pub struct Constructs {
/// ^^^
/// ```
pub gfm_strikethrough: bool,
+ /// GFM: table.
+ ///
+ /// ```markdown
+ /// > | | a |
+ /// ^^^^^
+ /// > | | - |
+ /// ^^^^^
+ /// > | | b |
+ /// ^^^^^
+ /// ```
+ pub gfm_table: bool,
/// GFM: task list item.
///
/// ```markdown
@@ -317,6 +328,7 @@ impl Default for Constructs {
gfm_label_start_footnote: false,
gfm_footnote_definition: false,
gfm_strikethrough: false,
+ gfm_table: false,
gfm_task_list_item: false,
hard_break_escape: true,
hard_break_trailing: true,
@@ -346,6 +358,7 @@ impl Constructs {
gfm_footnote_definition: true,
gfm_label_start_footnote: true,
gfm_strikethrough: true,
+ gfm_table: true,
gfm_task_list_item: true,
..Self::default()
}
diff --git a/src/resolve.rs b/src/resolve.rs
index a62d382..d015213 100644
--- a/src/resolve.rs
+++ b/src/resolve.rs
@@ -19,6 +19,11 @@ pub enum Name {
/// and what occurs before and after each sequence.
/// Otherwise they are turned into data.
Attention,
+ /// Resolve GFM tables.
+ ///
+ /// The table head, and later each row, are all parsed separately.
+ /// Resolving groups everything together, and groups cells.
+ GfmTable,
/// Resolve heading (atx).
///
/// Heading (atx) contains further sequences and data.
@@ -60,6 +65,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) {
let func = match name {
Name::Label => construct::label_end::resolve,
Name::Attention => construct::attention::resolve,
+ Name::GfmTable => construct::gfm_table::resolve,
Name::HeadingAtx => construct::heading_atx::resolve,
Name::HeadingSetext => construct::heading_setext::resolve,
Name::ListItem => construct::list_item::resolve,
diff --git a/src/state.rs b/src/state.rs
index a42e802..5013ec8 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -123,6 +123,7 @@ pub enum Name {
DocumentFlowEnd,
FlowStart,
+ FlowBeforeGfmTable,
FlowBeforeCodeIndented,
FlowBeforeRaw,
FlowBeforeHtml,
@@ -309,6 +310,28 @@ pub enum Name {
StringBefore,
StringBeforeData,
+ GfmTableStart,
+ GfmTableHeadRowBefore,
+ GfmTableHeadRowStart,
+ GfmTableHeadRowBreak,
+ GfmTableHeadRowData,
+ GfmTableHeadRowEscape,
+ GfmTableHeadDelimiterStart,
+ GfmTableHeadDelimiterBefore,
+ GfmTableHeadDelimiterCellBefore,
+ GfmTableHeadDelimiterValueBefore,
+ GfmTableHeadDelimiterLeftAlignmentAfter,
+ GfmTableHeadDelimiterFiller,
+ GfmTableHeadDelimiterRightAlignmentAfter,
+ GfmTableHeadDelimiterCellAfter,
+ GfmTableHeadDelimiterNok,
+
+ GfmTableBodyRowBefore,
+ GfmTableBodyRowStart,
+ GfmTableBodyRowBreak,
+ GfmTableBodyRowData,
+ GfmTableBodyRowEscape,
+
TextStart,
TextBefore,
TextBeforeHtml,
@@ -445,6 +468,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::DocumentFlowInside => construct::document::flow_inside,
Name::FlowStart => construct::flow::start,
+ Name::FlowBeforeGfmTable => construct::flow::before_gfm_table,
Name::FlowBeforeCodeIndented => construct::flow::before_code_indented,
Name::FlowBeforeRaw => construct::flow::before_raw,
Name::FlowBeforeHtml => construct::flow::before_html,
@@ -662,6 +686,33 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::StringBefore => construct::string::before,
Name::StringBeforeData => construct::string::before_data,
+ Name::GfmTableStart => construct::gfm_table::start,
+ Name::GfmTableHeadRowBefore => construct::gfm_table::head_row_before,
+ Name::GfmTableHeadRowStart => construct::gfm_table::head_row_start,
+ Name::GfmTableHeadRowBreak => construct::gfm_table::head_row_break,
+ Name::GfmTableHeadRowData => construct::gfm_table::head_row_data,
+ Name::GfmTableHeadRowEscape => construct::gfm_table::head_row_escape,
+
+ Name::GfmTableHeadDelimiterStart => construct::gfm_table::head_delimiter_start,
+ Name::GfmTableHeadDelimiterBefore => construct::gfm_table::head_delimiter_before,
+ Name::GfmTableHeadDelimiterCellBefore => construct::gfm_table::head_delimiter_cell_before,
+ Name::GfmTableHeadDelimiterValueBefore => construct::gfm_table::head_delimiter_value_before,
+ Name::GfmTableHeadDelimiterLeftAlignmentAfter => {
+ construct::gfm_table::head_delimiter_left_alignment_after
+ }
+ Name::GfmTableHeadDelimiterFiller => construct::gfm_table::head_delimiter_filler,
+ Name::GfmTableHeadDelimiterRightAlignmentAfter => {
+ construct::gfm_table::head_delimiter_right_alignment_after
+ }
+ Name::GfmTableHeadDelimiterCellAfter => construct::gfm_table::head_delimiter_cell_after,
+ Name::GfmTableHeadDelimiterNok => construct::gfm_table::head_delimiter_nok,
+
+ Name::GfmTableBodyRowBefore => construct::gfm_table::body_row_before,
+ Name::GfmTableBodyRowStart => construct::gfm_table::body_row_start,
+ Name::GfmTableBodyRowBreak => construct::gfm_table::body_row_break,
+ Name::GfmTableBodyRowData => construct::gfm_table::body_row_data,
+ Name::GfmTableBodyRowEscape => construct::gfm_table::body_row_escape,
+
Name::TextStart => construct::text::start,
Name::TextBefore => construct::text::before,
Name::TextBeforeHtml => construct::text::before_html,
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9b73836..0ab8784 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -305,10 +305,12 @@ pub struct Tokenizer<'a> {
///
/// Used when tokenizing [document content][crate::construct::document].
pub concrete: bool,
- /// Whether this line is lazy.
+ /// Whether this row is piercing into the current construct with more
+ /// containers.
///
- /// The previous line was a paragraph, and this line’s containers did not
- /// match.
+ /// Used when tokenizing [document content][crate::construct::document].
+ pub pierce: bool,
+ /// Whether this line is lazy: there are less containers than before.
pub lazy: bool,
}
@@ -370,6 +372,7 @@ impl<'a> Tokenizer<'a> {
},
map: EditMap::new(),
interrupt: false,
+ pierce: true,
concrete: false,
lazy: false,
resolvers: vec![],