diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-31 16:50:20 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-31 16:50:20 +0200 |
commit | b1590a4fb0c28fdb6af866ea79c186ea57284493 (patch) | |
tree | 61264dc36135e7dae34a04992a99b9f3f71e7b8e /src | |
parent | 670f1d82e01ea2394b21d7d1857f41bdc67b3fce (diff) | |
download | markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.tar.gz markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.tar.bz2 markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.zip |
Add support for GFM tables
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler.rs | 235 | ||||
-rw-r--r-- | src/construct/document.rs | 16 | ||||
-rw-r--r-- | src/construct/flow.rs | 41 | ||||
-rw-r--r-- | src/construct/gfm_autolink_literal.rs | 1 | ||||
-rw-r--r-- | src/construct/gfm_footnote_definition.rs | 2 | ||||
-rw-r--r-- | src/construct/gfm_table.rs | 1042 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 1 | ||||
-rw-r--r-- | src/construct/label_end.rs | 1 | ||||
-rw-r--r-- | src/construct/mod.rs | 2 | ||||
-rw-r--r-- | src/event.rs | 264 | ||||
-rw-r--r-- | src/lib.rs | 13 | ||||
-rw-r--r-- | src/resolve.rs | 6 | ||||
-rw-r--r-- | src/state.rs | 51 | ||||
-rw-r--r-- | src/tokenizer.rs | 9 |
14 files changed, 1648 insertions, 36 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 9057505..5626f8a 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -68,6 +68,43 @@ struct Definition { title: Option<String>, } +/// GFM table: column alignment. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +enum GfmTableAlign { + /// No alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | --- | + /// ^^^ + /// ``` + None, + /// Left alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | :-- | + /// ^^^ + /// ``` + Left, + /// Center alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | :-: | + /// ^^^ + /// ``` + Center, + /// Right alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | --: | + /// ^^^ + /// ``` + Right, +} + /// Context used to compile markdown. #[allow(clippy::struct_excessive_bools)] #[derive(Debug)] @@ -107,6 +144,12 @@ struct CompileContext<'a> { pub gfm_footnote_definitions: Vec<(String, String)>, pub gfm_footnote_definition_calls: Vec<(String, usize)>, pub gfm_footnote_definition_stack: Vec<(usize, usize)>, + /// Whether we are in a GFM table head. + pub gfm_table_in_head: bool, + /// Current GFM table alignment. + pub gfm_table_align: Option<Vec<GfmTableAlign>>, + /// Current GFM table column. + pub gfm_table_column: usize, // Fields used to influance the current compilation. /// Ignore the next line ending. pub slurp_one_line_ending: bool, @@ -153,6 +196,9 @@ impl<'a> CompileContext<'a> { gfm_footnote_definitions: vec![], gfm_footnote_definition_calls: vec![], gfm_footnote_definition_stack: vec![], + gfm_table_in_head: false, + gfm_table_align: None, + gfm_table_column: 0, tight_stack: vec![], slurp_one_line_ending: false, image_alt_inside: false, @@ -350,6 +396,11 @@ fn enter(context: &mut CompileContext) { Name::GfmFootnoteDefinition => on_enter_gfm_footnote_definition(context), Name::GfmFootnoteCall => on_enter_gfm_footnote_call(context), Name::GfmStrikethrough => on_enter_gfm_strikethrough(context), + Name::GfmTable => on_enter_gfm_table(context), + Name::GfmTableBody => on_enter_gfm_table_body(context), + Name::GfmTableCell => on_enter_gfm_table_cell(context), + Name::GfmTableHead => on_enter_gfm_table_head(context), + Name::GfmTableRow => on_enter_gfm_table_row(context), Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context), Name::HtmlFlow => on_enter_html_flow(context), Name::HtmlText => on_enter_html_text(context), @@ -407,6 +458,11 @@ fn exit(context: &mut CompileContext) { Name::GfmFootnoteDefinitionPrefix => on_exit_gfm_footnote_definition_prefix(context), Name::GfmFootnoteDefinition => on_exit_gfm_footnote_definition(context), Name::GfmStrikethrough => on_exit_gfm_strikethrough(context), + Name::GfmTable => on_exit_gfm_table(context), + Name::GfmTableBody => on_exit_gfm_table_body(context), + Name::GfmTableCell => on_exit_gfm_table_cell(context), + Name::GfmTableHead => on_exit_gfm_table_head(context), + Name::GfmTableRow => on_exit_gfm_table_row(context), Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context), Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context), Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context), @@ -536,6 +592,105 @@ fn on_enter_gfm_strikethrough(context: &mut CompileContext) { } } +/// Handle [`Enter`][Kind::Enter]:[`GfmTable`][Name::GfmTable]. +fn on_enter_gfm_table(context: &mut CompileContext) { + // Find the alignment. + let mut index = context.index; + let mut in_delimiter_row = false; + let mut align = vec![]; + + while index < context.events.len() { + let event = &context.events[index]; + + if in_delimiter_row { + if event.kind == Kind::Enter { + // Start of alignment value: set a new column. + if event.name == Name::GfmTableDelimiterCellValue { + align.push( + if context.events[index + 1].name == Name::GfmTableDelimiterMarker { + GfmTableAlign::Left + } else { + GfmTableAlign::None + }, + ); + } + } else { + // End of alignment value: change the column. + if event.name == Name::GfmTableDelimiterCellValue { + if context.events[index - 1].name == Name::GfmTableDelimiterMarker { + let align_index = align.len() - 1; + align[align_index] = if align[align_index] == GfmTableAlign::Left { + GfmTableAlign::Center + } else { + GfmTableAlign::Right + } + } + } + // Done! + else if event.name == Name::GfmTableDelimiterRow { + break; + } + } + } else if event.kind == Kind::Enter && event.name == Name::GfmTableDelimiterRow { + in_delimiter_row = true; + } + + index += 1; + } + + // Generate. + context.gfm_table_align = Some(align); + context.line_ending_if_needed(); + context.push("<table>"); +} + +/// Handle [`Enter`][Kind::Enter]:[`GfmTableBody`][Name::GfmTableBody]. +fn on_enter_gfm_table_body(context: &mut CompileContext) { + context.push("<tbody>"); +} + +/// Handle [`Enter`][Kind::Enter]:[`GfmTableCell`][Name::GfmTableCell]. +fn on_enter_gfm_table_cell(context: &mut CompileContext) { + let column = context.gfm_table_column; + let align = context.gfm_table_align.as_ref().unwrap(); + + if column >= align.len() { + // Capture cell to ignore it. + context.buffer(); + } else { + let value = align[column]; + context.line_ending_if_needed(); + + if context.gfm_table_in_head { + context.push("<th"); + } else { + context.push("<td"); + } + + match value { + GfmTableAlign::Left => context.push(" align=\"left\""), + GfmTableAlign::Right => context.push(" align=\"right\""), + GfmTableAlign::Center => context.push(" align=\"center\""), + GfmTableAlign::None => {} + } + + context.push(">"); + } +} + +/// Handle [`Enter`][Kind::Enter]:[`GfmTableHead`][Name::GfmTableHead]. +fn on_enter_gfm_table_head(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push("<thead>"); + context.gfm_table_in_head = true; +} + +/// Handle [`Enter`][Kind::Enter]:[`GfmTableRow`][Name::GfmTableRow]. +fn on_enter_gfm_table_row(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push("<tr>"); +} + /// Handle [`Enter`][Kind::Enter]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]. fn on_enter_gfm_task_list_item_check(context: &mut CompileContext) { if !context.image_alt_inside { @@ -892,7 +1047,24 @@ fn on_exit_raw_flow(context: &mut CompileContext) { /// Handle [`Exit`][Kind::Exit]:{[`CodeText`][Name::CodeText],[`MathText`][Name::MathText]}. fn on_exit_raw_text(context: &mut CompileContext) { let result = context.resume(); - let mut bytes = result.as_bytes(); + let mut bytes = result.as_bytes().to_vec(); + + // If we are in a GFM table, we need to decode escaped pipes. + // This is a rather weird GFM feature. + if context.gfm_table_align.is_some() { + let mut index = 0; + let mut len = bytes.len(); + + while index < len { + if index + 1 < len && bytes[index] == b'\\' && bytes[index + 1] == b'|' { + bytes.remove(index); + len -= 1; + } + + index += 1; + } + } + let mut trim = false; let mut index = 0; let mut end = bytes.len(); @@ -910,11 +1082,12 @@ fn on_exit_raw_text(context: &mut CompileContext) { } if trim { - bytes = &bytes[1..end]; + bytes.remove(0); + bytes.pop(); } context.raw_text_inside = false; - context.push(str::from_utf8(bytes).unwrap()); + context.push(str::from_utf8(&bytes).unwrap()); if !context.image_alt_inside { context.push("</code>"); @@ -1113,6 +1286,62 @@ fn on_exit_gfm_strikethrough(context: &mut CompileContext) { } } +/// Handle [`Exit`][Kind::Exit]:[`GfmTable`][Name::GfmTable]. +fn on_exit_gfm_table(context: &mut CompileContext) { + context.gfm_table_align = None; + context.line_ending_if_needed(); + context.push("</table>"); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableBody`][Name::GfmTableBody]. +fn on_exit_gfm_table_body(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push("</tbody>"); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableCell`][Name::GfmTableCell]. +fn on_exit_gfm_table_cell(context: &mut CompileContext) { + let align = context.gfm_table_align.as_ref().unwrap(); + + if context.gfm_table_column < align.len() { + if context.gfm_table_in_head { + context.push("</th>"); + } else { + context.push("</td>"); + } + } else { + // Stop capturing. + context.resume(); + } + + context.gfm_table_column += 1; +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableHead`][Name::GfmTableHead]. +fn on_exit_gfm_table_head(context: &mut CompileContext) { + context.gfm_table_in_head = false; + context.line_ending_if_needed(); + context.push("</thead>"); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableRow`][Name::GfmTableRow]. +fn on_exit_gfm_table_row(context: &mut CompileContext) { + let mut column = context.gfm_table_column; + let len = context.gfm_table_align.as_ref().unwrap().len(); + + // Add “phantom” cells, for body rows that are shorter than the delimiter + // row (which is equal to the head row). + while column < len { + on_enter_gfm_table_cell(context); + on_exit_gfm_table_cell(context); + column += 1; + } + + context.gfm_table_column = 0; + context.line_ending_if_needed(); + context.push("</tr>"); +} + /// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]. fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) { if !context.image_alt_inside { diff --git a/src/construct/document.rs b/src/construct/document.rs index 9c76e46..e31e58d 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -269,6 +269,14 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { exit_containers(tokenizer, &Phase::Prefix); } + // We are “piercing” into the flow with a new container. + tokenizer + .tokenize_state + .document_child + .as_mut() + .unwrap() + .pierce = true; + tokenizer .tokenize_state .document_container_stack @@ -398,12 +406,11 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { let mut stack_index = child.stack.len(); // Use two algo’s: one for when we’re suspended or in multiline things - // like definitions, another (b) for when we fed the line ending and closed - // a) + // like definitions, another for when we fed the line ending and closed. while !document_lazy_continuation_current && stack_index > 0 { stack_index -= 1; let name = &child.stack[stack_index]; - if name == &Name::Paragraph || name == &Name::Definition { + if name == &Name::Paragraph || name == &Name::Definition || name == &Name::GfmTableHead { document_lazy_continuation_current = true; } } @@ -418,6 +425,9 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { } } + // Reset “piercing”. + child.pierce = false; + if child.lazy && tokenizer.tokenize_state.document_lazy_accepting_before && document_lazy_continuation_current diff --git a/src/construct/flow.rs b/src/construct/flow.rs index 3f1cd77..3f7bc9c 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -65,29 +65,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::HtmlFlowStart) } - // Note: `-` is also used in thematic breaks so it’s not included here. - Some(b'=') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::HeadingSetextStart) - } - Some(b'[') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::DefinitionStart) - } // Actual parsing: blank line? Indented code? Indented anything? - // Also includes `-` which can be a setext heading underline or thematic break. - None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), - // Must be a paragraph. - Some(_) => { - tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); - State::Retry(StateName::ParagraphStart) - } + // Tables, setext heading underlines, definitions, and paragraphs are + // particularly weird. + _ => State::Retry(StateName::FlowBlankLineBefore), } } @@ -185,11 +166,25 @@ pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeDefinition), + State::Next(StateName::FlowBeforeGfmTable), ); State::Retry(StateName::ThematicBreakStart) } +/// At GFM table. +/// +/// ```markdown +/// > | | a | +/// ^ +/// ``` +pub fn before_gfm_table(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeDefinition), + ); + State::Retry(StateName::GfmTableStart) +} + /// At definition. /// /// ```markdown diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs index 7fdeb01..704c536 100644 --- a/src/construct/gfm_autolink_literal.rs +++ b/src/construct/gfm_autolink_literal.rs @@ -5,7 +5,6 @@ use crate::tokenizer::Tokenizer; use crate::util::classify_character::{classify, Kind as CharacterKind}; use crate::util::slice::{Position, Slice}; use alloc::vec::Vec; -extern crate std; use core::str; // To do: doc al functions. diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs index 3715044..cbe816f 100644 --- a/src/construct/gfm_footnote_definition.rs +++ b/src/construct/gfm_footnote_definition.rs @@ -141,7 +141,7 @@ //! //! ## References //! -//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-footnote) +//! * [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote) //! //! > 👉 **Note**: Footnotes are not specified in GFM yet. //! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270) diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs new file mode 100644 index 0000000..d7c2b69 --- /dev/null +++ b/src/construct/gfm_table.rs @@ -0,0 +1,1042 @@ +//! GFM: table occurs in the [flow][] content type. +//! +//! ## Grammar +//! +//! Tables form with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! gfm_table ::= gfm_table_head 0*(eol gfm_table_body_row) +//! +//! ; Restriction: both rows must have the same number of cells. +//! gfm_table_head ::= gfm_table_row eol gfm_table_delimiter_row +//! +//! gfm_table_row ::= ['|'] gfm_table_cell 0*('|' gfm_table_cell) ['|'] *space_or_tab +//! gfm_table_cell ::= *space_or_tab gfm_table_text *space_or_tab +//! gfm_table_text ::= 0*(line - '\\' - '|' | '\\' ['\\' | '|']) +// +//! gfm_table_delimiter_row ::= ['|'] gfm_table_delimiter_cell 0*('|' gfm_table_delimiter_cell) ['|'] *space_or_tab +//! gfm_table_delimiter_cell ::= *space_or_tab gfm_table_delimiter_value *space_or_tab +//! gfm_table_delimiter_value ::= [':'] 1*'-' [':'] +//! ``` +//! +//! As this construct occurs in flow, like all flow constructs, it must be +//! followed by an eol (line ending) or eof (end of file). +//! +//! The above grammar shows that basically anything can be a cell or a row. +//! The main thing that makes something a row, is that it occurs directly before +//! or after a delimiter row, or after another row. +//! +//! It is not required for a table to have a body: it can end right after the +//! delimiter row. +//! +//! Each column can be marked with an alignment. +//! The alignment marker is a colon (`:`) used before and/or after delimiter row +//! filler. +//! To illustrate: +//! +//! ```markdown +//! | none | left | right | center | +//! | ---- | :--- | ----: | :----: | +//! ``` +//! +//! The number of cells in the delimiter row, is the number of columns of the +//! table. +//! Only the head row is required to have the same number of cells. +//! Body rows are not required to have a certain number of cells. +//! For body rows that have less cells than the number of columns of the table, +//! empty cells are injected. +//! When a row has more cells than the number of columns of the table, the +//! superfluous cells are dropped. +//! To illustrate: +//! +//! ```markdown +//! | a | b | +//! | - | - | +//! | c | +//! | d | e | f | +//! ``` +//! +//! Yields: +//! +//! ```html +//! <table> +//! <thead> +//! <tr> +//! <th>a</th> +//! <th>b</th> +//! </tr> +//! </thead> +//! <tbody> +//! <tr> +//! <td>c</td> +//! <td></td> +//! </tr> +//! <tr> +//! <td>d</td> +//! <td>e</td> +//! </tr> +//! </tbody> +//! </table> +//! ``` +//! +//! Each cell’s text is interpreted as the [text][] content type. +//! That means that it can include constructs such as [attention][attention]. +//! +//! The grammar for cells prohibits the use of `|` in them. +//! To use pipes in cells, encode them as a character reference or character +//! escape: `|` (or `|`, `|`, `|`, `|`) or +//! `\|`. +//! +//! Escapes will typically work, but they are not supported in +//! [code (text)][raw_text] (and the math (text) extension). +//! To work around this, GitHub came up with a rather weird “trick”. +//! When inside a table cell *and* inside code, escaped pipes *are* decoded. +//! To illustrate: +//! +//! ```markdown +//! | Name | Character | +//! | - | - | +//! | Left curly brace | `{` | +//! | Pipe | `\|` | +//! | Right curly brace | `}` | +//! ``` +//! +//! Yields: +//! +//! ```html +//! <table> +//! <thead> +//! <tr> +//! <th>Name</th> +//! <th>Character</th> +//! </tr> +//! </thead> +//! <tbody> +//! <tr> +//! <td>Left curly brace</td> +//! <td><code>{</code></td> +//! </tr> +//! <tr> +//! <td>Pipe</td> +//! <td><code>|</code></td> +//! </tr> +//! <tr> +//! <td>Right curly brace</td> +//! <td><code>}</code></td> +//! </tr> +//! </tbody> +//! </table> +//! ``` +//! +//! > 👉 **Note**: no other character can be escaped like this. +//! > Escaping pipes in code does not work when not inside a table, either. +//! +//! ## HTML +//! +//! GFM tables relate to several HTML elements: `<table>`, `<tbody>`, `<td>`, +//! `<th>`, `<thead>`, and `<tr>`. +//! See +//! [*§ 4.9.1 The `table` element*][html_table], +//! [*§ 4.9.5 The `tbody` element*][html_tbody], +//! [*§ 4.9.9 The `td` element*][html_td], +//! [*§ 4.9.10 The `th` element*][html_th], +//! [*§ 4.9.6 The `thead` element*][html_thead], and +//! [*§ 4.9.8 The `tr` element*][html_tr] +//! in the HTML spec for more info. +//! +//! If the the alignment of a column is left, right, or center, a deprecated +//! `align` attribute is added to each `<th>` and `<td>` element belonging to +//! that column. +//! That attribute is interpreted by browsers as if a CSS `text-align` property +//! was included, with its value set to that same keyword. +//! +//! ## Recommendation +//! +//! When authoring markdown with GFM tables, it’s recommended to *always* put +//! pipes around cells. +//! Without them, it can be hard to infer whether the table will work, how many +//! columns there are, and which column you are currently editing. +//! +//! It is recommended to not use many columns, as it results in very long lines, +//! making it hard to infer which column you are currently editing. +//! +//! For larger tables, particularly when cells vary in size, it is recommended +//! *not* to manually “pad” cell text. +//! While it can look better, it results in a lot of time spent realigning +//! everything when a new, longer cell is added or the longest cell removed, as +//! every row then must be changed. +//! Other than costing time, it also causes large diffs in Git. +//! +//! To illustrate, when authoring large tables, it is discouraged to pad cells +//! like this: +//! +//! ```markdown +//! | Alpha bravo charlie | delta | +//! | ------------------- | -----------------: | +//! | Echo | Foxtrot golf hotel | +//! ``` +//! +//! Instead, use single spaces (and single filler dashes): +//! +//! ```markdown +//! | Alpha bravo charlie | delta | +//! | - | -: | +//! | Echo | Foxtrot golf hotel | +//! ``` +//! +//! ## Bugs +//! +//! GitHub’s own algorithm to parse tables contains a bug. +//! This bug is not present in this project. +//! The issue relating to tables is: +//! +//! * [GFM tables: escaped escapes are incorrectly treated as escapes](https://github.com/github/cmark-gfm/issues/277)\ +//! +//! ## Tokens +//! +//! * [`GfmTable`][Name::GfmTable] +//! * [`GfmTableBody`][Name::GfmTableBody] +//! * [`GfmTableCell`][Name::GfmTableCell] +//! * [`GfmTableCellDivider`][Name::GfmTableCellDivider] +//! * [`GfmTableCellText`][Name::GfmTableCellText] +//! * [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell] +//! * [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue] +//! * [`GfmTableDelimiterFiller`][Name::GfmTableDelimiterFiller] +//! * [`GfmTableDelimiterMarker`][Name::GfmTableDelimiterMarker] +//! * [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow] +//! * [`GfmTableHead`][Name::GfmTableHead] +//! * [`GfmTableRow`][Name::GfmTableRow] +//! * [`LineEnding`][Name::LineEnding] +//! +//! ## References +//! +//! * [`micromark-extension-gfm-table`](https://github.com/micromark/micromark-extension-gfm-table) +//! * [*§ 4.10 Tables (extension)* in `GFM`](https://github.github.com/gfm/#tables-extension-) +//! +//! [flow]: crate::construct::flow +//! [text]: crate::construct::text +//! [attention]: crate::construct::attention +//! [raw_text]: crate::construct::raw_text +//! [html_table]: https://html.spec.whatwg.org/multipage/tables.html#the-table-element +//! [html_tbody]: https://html.spec.whatwg.org/multipage/tables.html#the-tbody-element +//! [html_td]: https://html.spec.whatwg.org/multipage/tables.html#the-td-element +//! [html_th]: https://html.spec.whatwg.org/multipage/tables.html#the-th-element +//! [html_thead]: https://html.spec.whatwg.org/multipage/tables.html#the-thead-element +//! [html_tr]: https://html.spec.whatwg.org/multipage/tables.html#the-tr-element + +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::event::{Content, Event, Kind, Link, Name}; +use crate::resolve::Name as ResolveName; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; +use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back}; +use alloc::vec; + +/// Start of a GFM table. +/// +/// If there is a valid table row or table head before, then we try to parse +/// another row. +/// Otherwise, we try to parse a head. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.parse_state.options.constructs.gfm_table { + if !tokenizer.pierce + && !tokenizer.events.is_empty() + && matches!( + tokenizer.events[skip_opt_back( + &tokenizer.events, + tokenizer.events.len() - 1, + &[Name::LineEnding, Name::SpaceOrTab], + )] + .name, + Name::GfmTableHead | Name::GfmTableRow + ) + { + State::Retry(StateName::GfmTableBodyRowStart) + } else { + State::Retry(StateName::GfmTableHeadRowBefore) + } + } else { + State::Nok + } +} + +/// Before table head row. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_before(tokenizer: &mut Tokenizer) -> State { + tokenizer.enter(Name::GfmTableHead); + tokenizer.enter(Name::GfmTableRow); + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::GfmTableHeadRowStart), State::Nok); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::GfmTableHeadRowStart) + } +} + +/// Before table head row, after whitespace. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_start(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + // 4+ spaces. + Some(b'\t' | b' ') => State::Nok, + Some(b'|') => State::Retry(StateName::GfmTableHeadRowBreak), + _ => { + tokenizer.tokenize_state.seen = true; + State::Retry(StateName::GfmTableHeadRowBreak) + } + } +} + +/// At break in table head row. +/// +/// ```markdown +/// > | | a | +/// ^ +/// ^ +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_break(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => { + tokenizer.tokenize_state.seen = false; + State::Nok + } + Some(b'\n') => { + // Feel free to interrupt: + tokenizer.interrupt = true; + tokenizer.exit(Name::GfmTableRow); + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::GfmTableHeadDelimiterStart) + } + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } + _ => { + // Whether a delimiter was seen. + if tokenizer.tokenize_state.seen { + tokenizer.tokenize_state.seen = false; + // Header cell count. + tokenizer.tokenize_state.size += 1; + } + + if tokenizer.current == Some(b'|') { + tokenizer.enter(Name::GfmTableCellDivider); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableCellDivider); + // Whether a delimiter was seen. + tokenizer.tokenize_state.seen = true; + State::Next(StateName::GfmTableHeadRowBreak) + } else { + // Anything else is cell data. + tokenizer.enter(Name::Data); + State::Retry(StateName::GfmTableHeadRowData) + } + } + } +} + +/// In table head row data. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_data(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\t' | b'\n' | b' ' | b'|') => { + tokenizer.exit(Name::Data); + State::Retry(StateName::GfmTableHeadRowBreak) + } + _ => { + let name = if tokenizer.current == Some(b'\\') { + StateName::GfmTableHeadRowEscape + } else { + StateName::GfmTableHeadRowData + }; + tokenizer.consume(); + State::Next(name) + } + } +} + +/// In table head row escape. +/// +/// ```markdown +/// > | | a\-b | +/// ^ +/// | | ---- | +/// | | c | +/// ``` +pub fn head_row_escape(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\\' | b'|') => { + tokenizer.consume(); + State::Next(StateName::GfmTableHeadRowData) + } + _ => State::Retry(StateName::GfmTableHeadRowData), + } +} + +/// Before delimiter row. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// | | b | +/// ``` +pub fn head_delimiter_start(tokenizer: &mut Tokenizer) -> State { + // Reset `interrupt`. + tokenizer.interrupt = false; + + if tokenizer.lazy || tokenizer.pierce { + State::Nok + } else { + tokenizer.enter(Name::GfmTableDelimiterRow); + // Track if we’ve seen a `:` or `|`. + tokenizer.tokenize_state.seen = false; + + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt( + State::Next(StateName::GfmTableHeadDelimiterBefore), + State::Next(StateName::GfmTableHeadDelimiterNok), + ); + + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterBefore), + } + } +} + +/// Before delimiter row, after optional whitespace. +/// +/// Reused when a `|` is found later, to parse another cell. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// | | b | +/// ``` +pub fn head_delimiter_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'-' | b':') => State::Retry(StateName::GfmTableHeadDelimiterValueBefore), + Some(b'|') => { + tokenizer.tokenize_state.seen = true; + // If we start with a pipe, we open a cell marker. + tokenizer.enter(Name::GfmTableCellDivider); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableCellDivider); + State::Next(StateName::GfmTableHeadDelimiterCellBefore) + } + // More whitespace / empty row not allowed at start. + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// After `|`, before delimiter cell. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// ``` +pub fn head_delimiter_cell_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt( + State::Next(StateName::GfmTableHeadDelimiterValueBefore), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterValueBefore), + } +} + +/// Before delimiter cell value. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// ``` +pub fn head_delimiter_value_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => State::Retry(StateName::GfmTableHeadDelimiterCellAfter), + Some(b':') => { + // Align: left. + tokenizer.tokenize_state.size_b += 1; + tokenizer.tokenize_state.seen = true; + tokenizer.enter(Name::GfmTableDelimiterMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableDelimiterMarker); + State::Next(StateName::GfmTableHeadDelimiterLeftAlignmentAfter) + } + Some(b'-') => { + // Align: none. + tokenizer.tokenize_state.size_b += 1; + State::Retry(StateName::GfmTableHeadDelimiterLeftAlignmentAfter) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// After delimiter cell left alignment marker. +/// +/// ```markdown +/// | | a | +/// > | | :- | +/// ^ +/// ``` +pub fn head_delimiter_left_alignment_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'-') => { + tokenizer.enter(Name::GfmTableDelimiterFiller); + State::Retry(StateName::GfmTableHeadDelimiterFiller) + } + // Anything else is not ok after the left-align colon. + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// In delimiter cell filler. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// ``` +pub fn head_delimiter_filler(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'-') => { + tokenizer.consume(); + State::Next(StateName::GfmTableHeadDelimiterFiller) + } + Some(b':') => { + // Align is `center` if it was `left`, `right` otherwise. + tokenizer.tokenize_state.seen = true; + tokenizer.exit(Name::GfmTableDelimiterFiller); + tokenizer.enter(Name::GfmTableDelimiterMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableDelimiterMarker); + State::Next(StateName::GfmTableHeadDelimiterRightAlignmentAfter) + } + _ => { + tokenizer.exit(Name::GfmTableDelimiterFiller); + State::Retry(StateName::GfmTableHeadDelimiterRightAlignmentAfter) + } + } +} + +/// After delimiter cell right alignment marker. +/// +/// ```markdown +/// | | a | +/// > | | -: | +/// ^ +/// ``` +pub fn head_delimiter_right_alignment_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt( + State::Next(StateName::GfmTableHeadDelimiterCellAfter), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterCellAfter), + } +} + +/// After delimiter cell. +/// +/// ```markdown +/// | | a | +/// > | | -: | +/// ^ +/// ``` +pub fn head_delimiter_cell_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + // Exit when: + // * there was no `:` or `|` at all (it’s a thematic break or setext + // underline instead) + // * the header cell count is not the delimiter cell count + if !tokenizer.tokenize_state.seen + || tokenizer.tokenize_state.size != tokenizer.tokenize_state.size_b + { + State::Retry(StateName::GfmTableHeadDelimiterNok) + } else { + // Reset. + tokenizer.tokenize_state.seen = false; + tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.size_b = 0; + tokenizer.exit(Name::GfmTableDelimiterRow); + tokenizer.exit(Name::GfmTableHead); + tokenizer.register_resolver(ResolveName::GfmTable); + State::Ok + } + } + Some(b'|') => State::Retry(StateName::GfmTableHeadDelimiterBefore), + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// In delimiter row, at a disallowed byte. +/// +/// ```markdown +/// | | a | +/// > | | x | +/// ^ +/// ``` +pub fn head_delimiter_nok(tokenizer: &mut Tokenizer) -> State { + // Reset. + tokenizer.tokenize_state.seen = false; + tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.size_b = 0; + State::Nok +} + +/// Before table body row. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn body_row_start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.lazy { + State::Nok + } else { + tokenizer.enter(Name::GfmTableRow); + + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBefore), State::Nok); + + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } + _ => State::Retry(StateName::GfmTableBodyRowBefore), + } + } +} + +/// Before table body row, after optional whitespace. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn body_row_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => State::Nok, + _ => State::Retry(StateName::GfmTableBodyRowBreak), + } +} + +/// At break in table body row. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ^ +/// ^ +/// ``` +pub fn body_row_break(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::GfmTableRow); + State::Ok + } + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBreak), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } + Some(b'|') => { + tokenizer.enter(Name::GfmTableCellDivider); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableCellDivider); + State::Next(StateName::GfmTableBodyRowBreak) + } + // Anything else is cell content. + _ => { + tokenizer.enter(Name::Data); + State::Retry(StateName::GfmTableBodyRowData) + } + } +} + +/// In table body row data. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn body_row_data(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\t' | b'\n' | b' ' | b'|') => { + tokenizer.exit(Name::Data); + State::Retry(StateName::GfmTableBodyRowBreak) + } + _ => { + let name = if tokenizer.current == Some(b'\\') { + StateName::GfmTableBodyRowEscape + } else { + StateName::GfmTableBodyRowData + }; + tokenizer.consume(); + State::Next(name) + } + } +} + +/// In table body row escape. +/// +/// ```markdown +/// | | a | +/// | | ---- | +/// > | | b\-c | +/// ^ +/// ``` +pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\\' | b'|') => { + tokenizer.consume(); + State::Next(StateName::GfmTableBodyRowData) + } + _ => State::Retry(StateName::GfmTableBodyRowData), + } +} + +/// Resolve GFM table. +pub fn resolve(tokenizer: &mut Tokenizer) { + let mut index = 0; + // let mut tables = vec![]; + let mut in_first_cell_awaiting_pipe = true; + let mut in_row = false; + let mut in_delimiter_row = false; + let mut last_cell = (0, 0, 0, 0); + let mut cell = (0, 0, 0, 0); + + let mut after_head_awaiting_first_body_row = false; + let mut last_table_end = 0; + let mut last_table_has_body = false; + + while index < tokenizer.events.len() { + let event = &tokenizer.events[index]; + + if event.kind == Kind::Enter { + // Start of head. + if event.name == Name::GfmTableHead { + after_head_awaiting_first_body_row = false; + + // Inject previous (body end and) table end. + if last_table_end != 0 { + flush_table_end(tokenizer, last_table_end, last_table_has_body); + last_table_has_body = false; + last_table_end = 0; + } + + // Inject table start. + tokenizer.map.add( + index, + 0, + vec![Event { + kind: Kind::Enter, + name: Name::GfmTable, + point: tokenizer.events[index].point.clone(), + link: None, + }], + ); + } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { + in_delimiter_row = event.name == Name::GfmTableDelimiterRow; + in_row = true; + in_first_cell_awaiting_pipe = true; + last_cell = (0, 0, 0, 0); + cell = (0, index + 1, 0, 0); + + // Inject table body start. + if after_head_awaiting_first_body_row { + after_head_awaiting_first_body_row = false; + last_table_has_body = true; + tokenizer.map.add( + index, + 0, + vec![Event { + kind: Kind::Enter, + name: Name::GfmTableBody, + point: tokenizer.events[index].point.clone(), + link: None, + }], + ); + } + } + // Cell data. + else if in_row + && (event.name == Name::Data + || event.name == Name::GfmTableDelimiterMarker + || event.name == Name::GfmTableDelimiterFiller) + { + in_first_cell_awaiting_pipe = false; + + // First value in cell. + if cell.2 == 0 { + if last_cell.1 != 0 { + cell.0 = cell.1; + flush_cell(tokenizer, last_cell, in_delimiter_row, None); + last_cell = (0, 0, 0, 0); + } + + cell.2 = index; + } + } else if event.name == Name::GfmTableCellDivider { + if in_first_cell_awaiting_pipe { + in_first_cell_awaiting_pipe = false; + } else { + if last_cell.1 != 0 { + cell.0 = cell.1; + flush_cell(tokenizer, last_cell, in_delimiter_row, None); + } + + last_cell = cell; + cell = (last_cell.1, index, 0, 0); + } + } + // Exit events. + } else if event.name == Name::GfmTableHead { + after_head_awaiting_first_body_row = true; + last_table_end = index; + } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { + in_row = false; + last_table_end = index; + if last_cell.1 != 0 { + cell.0 = cell.1; + flush_cell(tokenizer, last_cell, in_delimiter_row, Some(index)); + } else if cell.1 != 0 { + flush_cell(tokenizer, cell, in_delimiter_row, Some(index)); + } + } else if in_row + && (event.name == Name::Data + || event.name == Name::GfmTableDelimiterMarker + || event.name == Name::GfmTableDelimiterFiller) + { + cell.3 = index; + } + + index += 1; + } + + if last_table_end != 0 { + flush_table_end(tokenizer, last_table_end, last_table_has_body); + } +} + +/// Generate a cell. +fn flush_cell( + tokenizer: &mut Tokenizer, + range: (usize, usize, usize, usize), + in_delimiter_row: bool, + row_end: Option<usize>, +) { + let group_name = if in_delimiter_row { + Name::GfmTableDelimiterCell + } else { + Name::GfmTableCell + }; + let value_name = if in_delimiter_row { + Name::GfmTableDelimiterCellValue + } else { + Name::GfmTableCellText + }; + + // Insert an exit for the previous cell, if there is one. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- exit + // ^^^^-- this cell + // ``` + if range.0 != 0 { + tokenizer.map.add( + range.0, + 0, + vec![Event { + kind: Kind::Exit, + name: group_name.clone(), + point: tokenizer.events[range.0].point.clone(), + link: None, + }], + ); + } + + // Insert enter of this cell. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- enter + // ^^^^-- this cell + // ``` + tokenizer.map.add( + range.1, + 0, + vec![Event { + kind: Kind::Enter, + name: group_name.clone(), + point: tokenizer.events[range.1].point.clone(), + link: None, + }], + ); + + // Insert text start at first data start and end at last data end, and + // remove events between. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- enter + // ^-- exit + // ^^^^-- this cell + // ``` + if range.2 != 0 { + tokenizer.map.add( + range.2, + 0, + vec![Event { + kind: Kind::Enter, + name: value_name.clone(), + point: tokenizer.events[range.2].point.clone(), + link: None, + }], + ); + debug_assert_ne!(range.3, 0); + + if !in_delimiter_row { + tokenizer.events[range.2].link = Some(Link { + previous: None, + next: None, + content: Content::Text, + }); + + if !in_delimiter_row && range.3 > range.2 + 1 { + let a = range.2 + 1; + let b = range.3 - range.2 - 1; + tokenizer.map.add(a, b, vec![]); + } + } + + tokenizer.map.add( + range.3 + 1, + 0, + vec![Event { + kind: Kind::Exit, + name: value_name, + point: tokenizer.events[range.3].point.clone(), + link: None, + }], + ); + } + + // Insert an exit for the last cell, if at the row end. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- exit + // ^^^^^^-- this cell (the last one contains two “between” parts) + // ``` + if let Some(row_end) = row_end { + tokenizer.map.add( + row_end, + 0, + vec![Event { + kind: Kind::Exit, + name: group_name, + point: tokenizer.events[row_end].point.clone(), + link: None, + }], + ); + } +} + +/// Generate table end (and table body end). +fn flush_table_end(tokenizer: &mut Tokenizer, index: usize, body: bool) { + let mut exits = vec![]; + + if body { + exits.push(Event { + kind: Kind::Exit, + name: Name::GfmTableBody, + point: tokenizer.events[index].point.clone(), + link: None, + }); + } + + exits.push(Event { + kind: Kind::Exit, + name: Name::GfmTable, + point: tokenizer.events[index].point.clone(), + link: None, + }); + + tokenizer.map.add(index + 1, 0, exits); +} diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index df1d4fb..e9cc759 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -87,6 +87,7 @@ use alloc::vec; pub fn start(tokenizer: &mut Tokenizer) -> State { if tokenizer.parse_state.options.constructs.heading_setext && !tokenizer.lazy + && !tokenizer.pierce // Require a paragraph before. && (!tokenizer.events.is_empty() && tokenizer.events[skip_opt_back( diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index b5a6013..8a9edfb 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -191,7 +191,6 @@ use crate::util::{ slice::{Position, Slice}, }; use alloc::{string::String, vec}; -extern crate std; /// Start of label end. /// diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 9add015..de88174 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -60,6 +60,7 @@ //! * [frontmatter][] //! * [gfm autolink literal][gfm_autolink_literal] //! * [gfm footnote definition][gfm_footnote_definition] +//! * [gfm table][gfm_table] //! * [gfm task list item check][gfm_task_list_item_check] //! * [gfm label start footnote][gfm_label_start_footnote] //! * math (text) (in `raw_text`) @@ -151,6 +152,7 @@ pub mod frontmatter; pub mod gfm_autolink_literal; pub mod gfm_footnote_definition; pub mod gfm_label_start_footnote; +pub mod gfm_table; pub mod gfm_task_list_item_check; pub mod hard_break_escape; pub mod heading_atx; diff --git a/src/event.rs b/src/event.rs index 3e540c0..ba266b4 100644 --- a/src/event.rs +++ b/src/event.rs @@ -1240,6 +1240,265 @@ pub enum Name { /// ^ /// ``` GfmStrikethroughText, + /// GFM extension: Table. + /// + /// ## Info + /// + /// * **Context**: + /// [flow content][crate::construct::flow] + /// * **Content model**: + /// [`GfmTableBody`][Name::GfmTableBody], + /// [`GfmTableHead`][Name::GfmTableHead], + /// [`LineEnding`][Name::LineEnding] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// > | | - | + /// ^^^^^ + /// > | | b | + /// ^^^^^ + /// ``` + GfmTable, + /// GFM extension: Table body. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTable`][Name::GfmTable] + /// * **Content model**: + /// [`GfmTableRow`][Name::GfmTableRow], + /// [`LineEnding`][Name::LineEnding] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// | | - | + /// > | | b | + /// ^^^^^ + /// ``` + GfmTableBody, + /// GFM extension: Table cell. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableRow`][Name::GfmTableRow] + /// * **Content model**: + /// [`GfmTableCellDivider`][Name::GfmTableCellDivider], + /// [`GfmTableCellText`][Name::GfmTableCellText], + /// [`SpaceOrTab`][Name::SpaceOrTab] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// | | - | + /// > | | b | + /// ^^^^^ + /// ``` + GfmTableCell, + /// GFM extension: Table cell text. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableCell`][Name::GfmTableCell] + /// * **Content model**: + /// [text content][crate::construct::text] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^ + /// | | - | + /// > | | b | + /// ^ + /// ``` + GfmTableCellText, + /// GFM extension: Table cell divider. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableCell`][Name::GfmTableCell] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^ ^ + /// > | | - | + /// ^ ^ + /// > | | b | + /// ^ ^ + /// ``` + GfmTableCellDivider, + /// GFM extension: Table delimiter row. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableHead`][Name::GfmTableHead] + /// * **Content model**: + /// [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | - | + /// ^^^^^ + /// | | b | + /// ``` + GfmTableDelimiterRow, + /// GFM extension: Table delimiter alignment marker. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | :- | + /// ^ + /// | | b | + /// ``` + GfmTableDelimiterMarker, + /// GFM extension: Table delimiter cell. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow] + /// * **Content model**: + /// [`GfmTableCellDivider`][Name::GfmTableCellDivider], + /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue], + /// [`SpaceOrTab`][Name::SpaceOrTab] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | - | + /// ^^^^^ + /// | | b | + /// ``` + GfmTableDelimiterCell, + /// GFM extension: Table delimiter cell alignment. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell] + /// * **Content model**: + /// [`GfmTableDelimiterMarker`][Name::GfmTableDelimiterMarker], + /// [`GfmTableDelimiterFiller`][Name::GfmTableDelimiterFiller] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | - | + /// ^ + /// | | b | + /// ``` + GfmTableDelimiterCellValue, + /// GFM extension: Table delimiter filler. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// | | a | + /// > | | - | + /// ^ + /// | | b | + /// ``` + GfmTableDelimiterFiller, + /// GFM extension: Table head. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTable`][Name::GfmTable] + /// * **Content model**: + /// [`GfmTableRow`][Name::GfmTableRow], + /// [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow], + /// [`LineEnding`][Name::LineEnding] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// > | | - | + /// ^^^^^ + /// | | b | + /// ``` + GfmTableHead, + /// GFM extension: Table row. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmTableBody`][Name::GfmTableBody], + /// [`GfmTableHead`][Name::GfmTableHead] + /// * **Content model**: + /// [`GfmTableCell`][Name::GfmTableCell] + /// * **Construct**: + /// [`gfm_table`][crate::construct::gfm_table] + /// + /// ## Example + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// | | - | + /// > | | b | + /// ^^^^^ + /// ``` + GfmTableRow, /// GFM extension: task list item check. /// /// ## Info @@ -2436,7 +2695,7 @@ pub enum Name { } /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 57] = [ +pub const VOID_EVENTS: [Name; 60] = [ Name::AttentionSequence, Name::AutolinkEmail, Name::AutolinkMarker, @@ -2469,6 +2728,9 @@ pub const VOID_EVENTS: [Name; 57] = [ Name::GfmFootnoteDefinitionLabelMarker, Name::GfmFootnoteDefinitionMarker, Name::GfmStrikethroughSequence, + Name::GfmTableCellDivider, + Name::GfmTableDelimiterMarker, + Name::GfmTableDelimiterFiller, Name::GfmTaskListItemMarker, Name::GfmTaskListItemValueChecked, Name::GfmTaskListItemValueUnchecked, @@ -191,6 +191,17 @@ pub struct Constructs { /// ^^^ /// ``` pub gfm_strikethrough: bool, + /// GFM: table. + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// > | | - | + /// ^^^^^ + /// > | | b | + /// ^^^^^ + /// ``` + pub gfm_table: bool, /// GFM: task list item. /// /// ```markdown @@ -317,6 +328,7 @@ impl Default for Constructs { gfm_label_start_footnote: false, gfm_footnote_definition: false, gfm_strikethrough: false, + gfm_table: false, gfm_task_list_item: false, hard_break_escape: true, hard_break_trailing: true, @@ -346,6 +358,7 @@ impl Constructs { gfm_footnote_definition: true, gfm_label_start_footnote: true, gfm_strikethrough: true, + gfm_table: true, gfm_task_list_item: true, ..Self::default() } diff --git a/src/resolve.rs b/src/resolve.rs index a62d382..d015213 100644 --- a/src/resolve.rs +++ b/src/resolve.rs @@ -19,6 +19,11 @@ pub enum Name { /// and what occurs before and after each sequence. /// Otherwise they are turned into data. Attention, + /// Resolve GFM tables. + /// + /// The table head, and later each row, are all parsed separately. + /// Resolving groups everything together, and groups cells. + GfmTable, /// Resolve heading (atx). /// /// Heading (atx) contains further sequences and data. @@ -60,6 +65,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) { let func = match name { Name::Label => construct::label_end::resolve, Name::Attention => construct::attention::resolve, + Name::GfmTable => construct::gfm_table::resolve, Name::HeadingAtx => construct::heading_atx::resolve, Name::HeadingSetext => construct::heading_setext::resolve, Name::ListItem => construct::list_item::resolve, diff --git a/src/state.rs b/src/state.rs index a42e802..5013ec8 100644 --- a/src/state.rs +++ b/src/state.rs @@ -123,6 +123,7 @@ pub enum Name { DocumentFlowEnd, FlowStart, + FlowBeforeGfmTable, FlowBeforeCodeIndented, FlowBeforeRaw, FlowBeforeHtml, @@ -309,6 +310,28 @@ pub enum Name { StringBefore, StringBeforeData, + GfmTableStart, + GfmTableHeadRowBefore, + GfmTableHeadRowStart, + GfmTableHeadRowBreak, + GfmTableHeadRowData, + GfmTableHeadRowEscape, + GfmTableHeadDelimiterStart, + GfmTableHeadDelimiterBefore, + GfmTableHeadDelimiterCellBefore, + GfmTableHeadDelimiterValueBefore, + GfmTableHeadDelimiterLeftAlignmentAfter, + GfmTableHeadDelimiterFiller, + GfmTableHeadDelimiterRightAlignmentAfter, + GfmTableHeadDelimiterCellAfter, + GfmTableHeadDelimiterNok, + + GfmTableBodyRowBefore, + GfmTableBodyRowStart, + GfmTableBodyRowBreak, + GfmTableBodyRowData, + GfmTableBodyRowEscape, + TextStart, TextBefore, TextBeforeHtml, @@ -445,6 +468,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::DocumentFlowInside => construct::document::flow_inside, Name::FlowStart => construct::flow::start, + Name::FlowBeforeGfmTable => construct::flow::before_gfm_table, Name::FlowBeforeCodeIndented => construct::flow::before_code_indented, Name::FlowBeforeRaw => construct::flow::before_raw, Name::FlowBeforeHtml => construct::flow::before_html, @@ -662,6 +686,33 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::StringBefore => construct::string::before, Name::StringBeforeData => construct::string::before_data, + Name::GfmTableStart => construct::gfm_table::start, + Name::GfmTableHeadRowBefore => construct::gfm_table::head_row_before, + Name::GfmTableHeadRowStart => construct::gfm_table::head_row_start, + Name::GfmTableHeadRowBreak => construct::gfm_table::head_row_break, + Name::GfmTableHeadRowData => construct::gfm_table::head_row_data, + Name::GfmTableHeadRowEscape => construct::gfm_table::head_row_escape, + + Name::GfmTableHeadDelimiterStart => construct::gfm_table::head_delimiter_start, + Name::GfmTableHeadDelimiterBefore => construct::gfm_table::head_delimiter_before, + Name::GfmTableHeadDelimiterCellBefore => construct::gfm_table::head_delimiter_cell_before, + Name::GfmTableHeadDelimiterValueBefore => construct::gfm_table::head_delimiter_value_before, + Name::GfmTableHeadDelimiterLeftAlignmentAfter => { + construct::gfm_table::head_delimiter_left_alignment_after + } + Name::GfmTableHeadDelimiterFiller => construct::gfm_table::head_delimiter_filler, + Name::GfmTableHeadDelimiterRightAlignmentAfter => { + construct::gfm_table::head_delimiter_right_alignment_after + } + Name::GfmTableHeadDelimiterCellAfter => construct::gfm_table::head_delimiter_cell_after, + Name::GfmTableHeadDelimiterNok => construct::gfm_table::head_delimiter_nok, + + Name::GfmTableBodyRowBefore => construct::gfm_table::body_row_before, + Name::GfmTableBodyRowStart => construct::gfm_table::body_row_start, + Name::GfmTableBodyRowBreak => construct::gfm_table::body_row_break, + Name::GfmTableBodyRowData => construct::gfm_table::body_row_data, + Name::GfmTableBodyRowEscape => construct::gfm_table::body_row_escape, + Name::TextStart => construct::text::start, Name::TextBefore => construct::text::before, Name::TextBeforeHtml => construct::text::before_html, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 9b73836..0ab8784 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -305,10 +305,12 @@ pub struct Tokenizer<'a> { /// /// Used when tokenizing [document content][crate::construct::document]. pub concrete: bool, - /// Whether this line is lazy. + /// Whether this row is piercing into the current construct with more + /// containers. /// - /// The previous line was a paragraph, and this line’s containers did not - /// match. + /// Used when tokenizing [document content][crate::construct::document]. + pub pierce: bool, + /// Whether this line is lazy: there are less containers than before. pub lazy: bool, } @@ -370,6 +372,7 @@ impl<'a> Tokenizer<'a> { }, map: EditMap::new(), interrupt: false, + pierce: true, concrete: false, lazy: false, resolvers: vec![], |