diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-31 16:50:20 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-31 16:50:20 +0200 |
commit | b1590a4fb0c28fdb6af866ea79c186ea57284493 (patch) | |
tree | 61264dc36135e7dae34a04992a99b9f3f71e7b8e /src/construct/gfm_table.rs | |
parent | 670f1d82e01ea2394b21d7d1857f41bdc67b3fce (diff) | |
download | markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.tar.gz markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.tar.bz2 markdown-rs-b1590a4fb0c28fdb6af866ea79c186ea57284493.zip |
Add support for GFM tables
Diffstat (limited to '')
-rw-r--r-- | src/construct/gfm_table.rs | 1042 |
1 files changed, 1042 insertions, 0 deletions
diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs new file mode 100644 index 0000000..d7c2b69 --- /dev/null +++ b/src/construct/gfm_table.rs @@ -0,0 +1,1042 @@ +//! GFM: table occurs in the [flow][] content type. +//! +//! ## Grammar +//! +//! Tables form with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! gfm_table ::= gfm_table_head 0*(eol gfm_table_body_row) +//! +//! ; Restriction: both rows must have the same number of cells. +//! gfm_table_head ::= gfm_table_row eol gfm_table_delimiter_row +//! +//! gfm_table_row ::= ['|'] gfm_table_cell 0*('|' gfm_table_cell) ['|'] *space_or_tab +//! gfm_table_cell ::= *space_or_tab gfm_table_text *space_or_tab +//! gfm_table_text ::= 0*(line - '\\' - '|' | '\\' ['\\' | '|']) +// +//! gfm_table_delimiter_row ::= ['|'] gfm_table_delimiter_cell 0*('|' gfm_table_delimiter_cell) ['|'] *space_or_tab +//! gfm_table_delimiter_cell ::= *space_or_tab gfm_table_delimiter_value *space_or_tab +//! gfm_table_delimiter_value ::= [':'] 1*'-' [':'] +//! ``` +//! +//! As this construct occurs in flow, like all flow constructs, it must be +//! followed by an eol (line ending) or eof (end of file). +//! +//! The above grammar shows that basically anything can be a cell or a row. +//! The main thing that makes something a row, is that it occurs directly before +//! or after a delimiter row, or after another row. +//! +//! It is not required for a table to have a body: it can end right after the +//! delimiter row. +//! +//! Each column can be marked with an alignment. +//! The alignment marker is a colon (`:`) used before and/or after delimiter row +//! filler. +//! To illustrate: +//! +//! ```markdown +//! | none | left | right | center | +//! | ---- | :--- | ----: | :----: | +//! ``` +//! +//! The number of cells in the delimiter row, is the number of columns of the +//! table. +//! Only the head row is required to have the same number of cells. +//! Body rows are not required to have a certain number of cells. +//! For body rows that have less cells than the number of columns of the table, +//! empty cells are injected. +//! When a row has more cells than the number of columns of the table, the +//! superfluous cells are dropped. +//! To illustrate: +//! +//! ```markdown +//! | a | b | +//! | - | - | +//! | c | +//! | d | e | f | +//! ``` +//! +//! Yields: +//! +//! ```html +//! <table> +//! <thead> +//! <tr> +//! <th>a</th> +//! <th>b</th> +//! </tr> +//! </thead> +//! <tbody> +//! <tr> +//! <td>c</td> +//! <td></td> +//! </tr> +//! <tr> +//! <td>d</td> +//! <td>e</td> +//! </tr> +//! </tbody> +//! </table> +//! ``` +//! +//! Each cell’s text is interpreted as the [text][] content type. +//! That means that it can include constructs such as [attention][attention]. +//! +//! The grammar for cells prohibits the use of `|` in them. +//! To use pipes in cells, encode them as a character reference or character +//! escape: `|` (or `|`, `|`, `|`, `|`) or +//! `\|`. +//! +//! Escapes will typically work, but they are not supported in +//! [code (text)][raw_text] (and the math (text) extension). +//! To work around this, GitHub came up with a rather weird “trick”. +//! When inside a table cell *and* inside code, escaped pipes *are* decoded. +//! To illustrate: +//! +//! ```markdown +//! | Name | Character | +//! | - | - | +//! | Left curly brace | `{` | +//! | Pipe | `\|` | +//! | Right curly brace | `}` | +//! ``` +//! +//! Yields: +//! +//! ```html +//! <table> +//! <thead> +//! <tr> +//! <th>Name</th> +//! <th>Character</th> +//! </tr> +//! </thead> +//! <tbody> +//! <tr> +//! <td>Left curly brace</td> +//! <td><code>{</code></td> +//! </tr> +//! <tr> +//! <td>Pipe</td> +//! <td><code>|</code></td> +//! </tr> +//! <tr> +//! <td>Right curly brace</td> +//! <td><code>}</code></td> +//! </tr> +//! </tbody> +//! </table> +//! ``` +//! +//! > 👉 **Note**: no other character can be escaped like this. +//! > Escaping pipes in code does not work when not inside a table, either. +//! +//! ## HTML +//! +//! GFM tables relate to several HTML elements: `<table>`, `<tbody>`, `<td>`, +//! `<th>`, `<thead>`, and `<tr>`. +//! See +//! [*§ 4.9.1 The `table` element*][html_table], +//! [*§ 4.9.5 The `tbody` element*][html_tbody], +//! [*§ 4.9.9 The `td` element*][html_td], +//! [*§ 4.9.10 The `th` element*][html_th], +//! [*§ 4.9.6 The `thead` element*][html_thead], and +//! [*§ 4.9.8 The `tr` element*][html_tr] +//! in the HTML spec for more info. +//! +//! If the the alignment of a column is left, right, or center, a deprecated +//! `align` attribute is added to each `<th>` and `<td>` element belonging to +//! that column. +//! That attribute is interpreted by browsers as if a CSS `text-align` property +//! was included, with its value set to that same keyword. +//! +//! ## Recommendation +//! +//! When authoring markdown with GFM tables, it’s recommended to *always* put +//! pipes around cells. +//! Without them, it can be hard to infer whether the table will work, how many +//! columns there are, and which column you are currently editing. +//! +//! It is recommended to not use many columns, as it results in very long lines, +//! making it hard to infer which column you are currently editing. +//! +//! For larger tables, particularly when cells vary in size, it is recommended +//! *not* to manually “pad” cell text. +//! While it can look better, it results in a lot of time spent realigning +//! everything when a new, longer cell is added or the longest cell removed, as +//! every row then must be changed. +//! Other than costing time, it also causes large diffs in Git. +//! +//! To illustrate, when authoring large tables, it is discouraged to pad cells +//! like this: +//! +//! ```markdown +//! | Alpha bravo charlie | delta | +//! | ------------------- | -----------------: | +//! | Echo | Foxtrot golf hotel | +//! ``` +//! +//! Instead, use single spaces (and single filler dashes): +//! +//! ```markdown +//! | Alpha bravo charlie | delta | +//! | - | -: | +//! | Echo | Foxtrot golf hotel | +//! ``` +//! +//! ## Bugs +//! +//! GitHub’s own algorithm to parse tables contains a bug. +//! This bug is not present in this project. +//! The issue relating to tables is: +//! +//! * [GFM tables: escaped escapes are incorrectly treated as escapes](https://github.com/github/cmark-gfm/issues/277)\ +//! +//! ## Tokens +//! +//! * [`GfmTable`][Name::GfmTable] +//! * [`GfmTableBody`][Name::GfmTableBody] +//! * [`GfmTableCell`][Name::GfmTableCell] +//! * [`GfmTableCellDivider`][Name::GfmTableCellDivider] +//! * [`GfmTableCellText`][Name::GfmTableCellText] +//! * [`GfmTableDelimiterCell`][Name::GfmTableDelimiterCell] +//! * [`GfmTableDelimiterCellValue`][Name::GfmTableDelimiterCellValue] +//! * [`GfmTableDelimiterFiller`][Name::GfmTableDelimiterFiller] +//! * [`GfmTableDelimiterMarker`][Name::GfmTableDelimiterMarker] +//! * [`GfmTableDelimiterRow`][Name::GfmTableDelimiterRow] +//! * [`GfmTableHead`][Name::GfmTableHead] +//! * [`GfmTableRow`][Name::GfmTableRow] +//! * [`LineEnding`][Name::LineEnding] +//! +//! ## References +//! +//! * [`micromark-extension-gfm-table`](https://github.com/micromark/micromark-extension-gfm-table) +//! * [*§ 4.10 Tables (extension)* in `GFM`](https://github.github.com/gfm/#tables-extension-) +//! +//! [flow]: crate::construct::flow +//! [text]: crate::construct::text +//! [attention]: crate::construct::attention +//! [raw_text]: crate::construct::raw_text +//! [html_table]: https://html.spec.whatwg.org/multipage/tables.html#the-table-element +//! [html_tbody]: https://html.spec.whatwg.org/multipage/tables.html#the-tbody-element +//! [html_td]: https://html.spec.whatwg.org/multipage/tables.html#the-td-element +//! [html_th]: https://html.spec.whatwg.org/multipage/tables.html#the-th-element +//! [html_thead]: https://html.spec.whatwg.org/multipage/tables.html#the-thead-element +//! [html_tr]: https://html.spec.whatwg.org/multipage/tables.html#the-tr-element + +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::event::{Content, Event, Kind, Link, Name}; +use crate::resolve::Name as ResolveName; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; +use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back}; +use alloc::vec; + +/// Start of a GFM table. +/// +/// If there is a valid table row or table head before, then we try to parse +/// another row. +/// Otherwise, we try to parse a head. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.parse_state.options.constructs.gfm_table { + if !tokenizer.pierce + && !tokenizer.events.is_empty() + && matches!( + tokenizer.events[skip_opt_back( + &tokenizer.events, + tokenizer.events.len() - 1, + &[Name::LineEnding, Name::SpaceOrTab], + )] + .name, + Name::GfmTableHead | Name::GfmTableRow + ) + { + State::Retry(StateName::GfmTableBodyRowStart) + } else { + State::Retry(StateName::GfmTableHeadRowBefore) + } + } else { + State::Nok + } +} + +/// Before table head row. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_before(tokenizer: &mut Tokenizer) -> State { + tokenizer.enter(Name::GfmTableHead); + tokenizer.enter(Name::GfmTableRow); + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::GfmTableHeadRowStart), State::Nok); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::GfmTableHeadRowStart) + } +} + +/// Before table head row, after whitespace. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_start(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + // 4+ spaces. + Some(b'\t' | b' ') => State::Nok, + Some(b'|') => State::Retry(StateName::GfmTableHeadRowBreak), + _ => { + tokenizer.tokenize_state.seen = true; + State::Retry(StateName::GfmTableHeadRowBreak) + } + } +} + +/// At break in table head row. +/// +/// ```markdown +/// > | | a | +/// ^ +/// ^ +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_break(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => { + tokenizer.tokenize_state.seen = false; + State::Nok + } + Some(b'\n') => { + // Feel free to interrupt: + tokenizer.interrupt = true; + tokenizer.exit(Name::GfmTableRow); + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::GfmTableHeadDelimiterStart) + } + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } + _ => { + // Whether a delimiter was seen. + if tokenizer.tokenize_state.seen { + tokenizer.tokenize_state.seen = false; + // Header cell count. + tokenizer.tokenize_state.size += 1; + } + + if tokenizer.current == Some(b'|') { + tokenizer.enter(Name::GfmTableCellDivider); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableCellDivider); + // Whether a delimiter was seen. + tokenizer.tokenize_state.seen = true; + State::Next(StateName::GfmTableHeadRowBreak) + } else { + // Anything else is cell data. + tokenizer.enter(Name::Data); + State::Retry(StateName::GfmTableHeadRowData) + } + } + } +} + +/// In table head row data. +/// +/// ```markdown +/// > | | a | +/// ^ +/// | | - | +/// | | b | +/// ``` +pub fn head_row_data(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\t' | b'\n' | b' ' | b'|') => { + tokenizer.exit(Name::Data); + State::Retry(StateName::GfmTableHeadRowBreak) + } + _ => { + let name = if tokenizer.current == Some(b'\\') { + StateName::GfmTableHeadRowEscape + } else { + StateName::GfmTableHeadRowData + }; + tokenizer.consume(); + State::Next(name) + } + } +} + +/// In table head row escape. +/// +/// ```markdown +/// > | | a\-b | +/// ^ +/// | | ---- | +/// | | c | +/// ``` +pub fn head_row_escape(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\\' | b'|') => { + tokenizer.consume(); + State::Next(StateName::GfmTableHeadRowData) + } + _ => State::Retry(StateName::GfmTableHeadRowData), + } +} + +/// Before delimiter row. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// | | b | +/// ``` +pub fn head_delimiter_start(tokenizer: &mut Tokenizer) -> State { + // Reset `interrupt`. + tokenizer.interrupt = false; + + if tokenizer.lazy || tokenizer.pierce { + State::Nok + } else { + tokenizer.enter(Name::GfmTableDelimiterRow); + // Track if we’ve seen a `:` or `|`. + tokenizer.tokenize_state.seen = false; + + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt( + State::Next(StateName::GfmTableHeadDelimiterBefore), + State::Next(StateName::GfmTableHeadDelimiterNok), + ); + + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterBefore), + } + } +} + +/// Before delimiter row, after optional whitespace. +/// +/// Reused when a `|` is found later, to parse another cell. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// | | b | +/// ``` +pub fn head_delimiter_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'-' | b':') => State::Retry(StateName::GfmTableHeadDelimiterValueBefore), + Some(b'|') => { + tokenizer.tokenize_state.seen = true; + // If we start with a pipe, we open a cell marker. + tokenizer.enter(Name::GfmTableCellDivider); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableCellDivider); + State::Next(StateName::GfmTableHeadDelimiterCellBefore) + } + // More whitespace / empty row not allowed at start. + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// After `|`, before delimiter cell. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// ``` +pub fn head_delimiter_cell_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt( + State::Next(StateName::GfmTableHeadDelimiterValueBefore), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterValueBefore), + } +} + +/// Before delimiter cell value. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// ``` +pub fn head_delimiter_value_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => State::Retry(StateName::GfmTableHeadDelimiterCellAfter), + Some(b':') => { + // Align: left. + tokenizer.tokenize_state.size_b += 1; + tokenizer.tokenize_state.seen = true; + tokenizer.enter(Name::GfmTableDelimiterMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableDelimiterMarker); + State::Next(StateName::GfmTableHeadDelimiterLeftAlignmentAfter) + } + Some(b'-') => { + // Align: none. + tokenizer.tokenize_state.size_b += 1; + State::Retry(StateName::GfmTableHeadDelimiterLeftAlignmentAfter) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// After delimiter cell left alignment marker. +/// +/// ```markdown +/// | | a | +/// > | | :- | +/// ^ +/// ``` +pub fn head_delimiter_left_alignment_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'-') => { + tokenizer.enter(Name::GfmTableDelimiterFiller); + State::Retry(StateName::GfmTableHeadDelimiterFiller) + } + // Anything else is not ok after the left-align colon. + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// In delimiter cell filler. +/// +/// ```markdown +/// | | a | +/// > | | - | +/// ^ +/// ``` +pub fn head_delimiter_filler(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'-') => { + tokenizer.consume(); + State::Next(StateName::GfmTableHeadDelimiterFiller) + } + Some(b':') => { + // Align is `center` if it was `left`, `right` otherwise. + tokenizer.tokenize_state.seen = true; + tokenizer.exit(Name::GfmTableDelimiterFiller); + tokenizer.enter(Name::GfmTableDelimiterMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableDelimiterMarker); + State::Next(StateName::GfmTableHeadDelimiterRightAlignmentAfter) + } + _ => { + tokenizer.exit(Name::GfmTableDelimiterFiller); + State::Retry(StateName::GfmTableHeadDelimiterRightAlignmentAfter) + } + } +} + +/// After delimiter cell right alignment marker. +/// +/// ```markdown +/// | | a | +/// > | | -: | +/// ^ +/// ``` +pub fn head_delimiter_right_alignment_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt( + State::Next(StateName::GfmTableHeadDelimiterCellAfter), + State::Nok, + ); + State::Retry(space_or_tab(tokenizer)) + } + _ => State::Retry(StateName::GfmTableHeadDelimiterCellAfter), + } +} + +/// After delimiter cell. +/// +/// ```markdown +/// | | a | +/// > | | -: | +/// ^ +/// ``` +pub fn head_delimiter_cell_after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + // Exit when: + // * there was no `:` or `|` at all (it’s a thematic break or setext + // underline instead) + // * the header cell count is not the delimiter cell count + if !tokenizer.tokenize_state.seen + || tokenizer.tokenize_state.size != tokenizer.tokenize_state.size_b + { + State::Retry(StateName::GfmTableHeadDelimiterNok) + } else { + // Reset. + tokenizer.tokenize_state.seen = false; + tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.size_b = 0; + tokenizer.exit(Name::GfmTableDelimiterRow); + tokenizer.exit(Name::GfmTableHead); + tokenizer.register_resolver(ResolveName::GfmTable); + State::Ok + } + } + Some(b'|') => State::Retry(StateName::GfmTableHeadDelimiterBefore), + _ => State::Retry(StateName::GfmTableHeadDelimiterNok), + } +} + +/// In delimiter row, at a disallowed byte. +/// +/// ```markdown +/// | | a | +/// > | | x | +/// ^ +/// ``` +pub fn head_delimiter_nok(tokenizer: &mut Tokenizer) -> State { + // Reset. + tokenizer.tokenize_state.seen = false; + tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.size_b = 0; + State::Nok +} + +/// Before table body row. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn body_row_start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.lazy { + State::Nok + } else { + tokenizer.enter(Name::GfmTableRow); + + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBefore), State::Nok); + + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } + _ => State::Retry(StateName::GfmTableBodyRowBefore), + } + } +} + +/// Before table body row, after optional whitespace. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn body_row_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => State::Nok, + _ => State::Retry(StateName::GfmTableBodyRowBreak), + } +} + +/// At break in table body row. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ^ +/// ^ +/// ``` +pub fn body_row_break(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::GfmTableRow); + State::Ok + } + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::GfmTableBodyRowBreak), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } + Some(b'|') => { + tokenizer.enter(Name::GfmTableCellDivider); + tokenizer.consume(); + tokenizer.exit(Name::GfmTableCellDivider); + State::Next(StateName::GfmTableBodyRowBreak) + } + // Anything else is cell content. + _ => { + tokenizer.enter(Name::Data); + State::Retry(StateName::GfmTableBodyRowData) + } + } +} + +/// In table body row data. +/// +/// ```markdown +/// | | a | +/// | | - | +/// > | | b | +/// ^ +/// ``` +pub fn body_row_data(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\t' | b'\n' | b' ' | b'|') => { + tokenizer.exit(Name::Data); + State::Retry(StateName::GfmTableBodyRowBreak) + } + _ => { + let name = if tokenizer.current == Some(b'\\') { + StateName::GfmTableBodyRowEscape + } else { + StateName::GfmTableBodyRowData + }; + tokenizer.consume(); + State::Next(name) + } + } +} + +/// In table body row escape. +/// +/// ```markdown +/// | | a | +/// | | ---- | +/// > | | b\-c | +/// ^ +/// ``` +pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\\' | b'|') => { + tokenizer.consume(); + State::Next(StateName::GfmTableBodyRowData) + } + _ => State::Retry(StateName::GfmTableBodyRowData), + } +} + +/// Resolve GFM table. +pub fn resolve(tokenizer: &mut Tokenizer) { + let mut index = 0; + // let mut tables = vec![]; + let mut in_first_cell_awaiting_pipe = true; + let mut in_row = false; + let mut in_delimiter_row = false; + let mut last_cell = (0, 0, 0, 0); + let mut cell = (0, 0, 0, 0); + + let mut after_head_awaiting_first_body_row = false; + let mut last_table_end = 0; + let mut last_table_has_body = false; + + while index < tokenizer.events.len() { + let event = &tokenizer.events[index]; + + if event.kind == Kind::Enter { + // Start of head. + if event.name == Name::GfmTableHead { + after_head_awaiting_first_body_row = false; + + // Inject previous (body end and) table end. + if last_table_end != 0 { + flush_table_end(tokenizer, last_table_end, last_table_has_body); + last_table_has_body = false; + last_table_end = 0; + } + + // Inject table start. + tokenizer.map.add( + index, + 0, + vec![Event { + kind: Kind::Enter, + name: Name::GfmTable, + point: tokenizer.events[index].point.clone(), + link: None, + }], + ); + } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { + in_delimiter_row = event.name == Name::GfmTableDelimiterRow; + in_row = true; + in_first_cell_awaiting_pipe = true; + last_cell = (0, 0, 0, 0); + cell = (0, index + 1, 0, 0); + + // Inject table body start. + if after_head_awaiting_first_body_row { + after_head_awaiting_first_body_row = false; + last_table_has_body = true; + tokenizer.map.add( + index, + 0, + vec![Event { + kind: Kind::Enter, + name: Name::GfmTableBody, + point: tokenizer.events[index].point.clone(), + link: None, + }], + ); + } + } + // Cell data. + else if in_row + && (event.name == Name::Data + || event.name == Name::GfmTableDelimiterMarker + || event.name == Name::GfmTableDelimiterFiller) + { + in_first_cell_awaiting_pipe = false; + + // First value in cell. + if cell.2 == 0 { + if last_cell.1 != 0 { + cell.0 = cell.1; + flush_cell(tokenizer, last_cell, in_delimiter_row, None); + last_cell = (0, 0, 0, 0); + } + + cell.2 = index; + } + } else if event.name == Name::GfmTableCellDivider { + if in_first_cell_awaiting_pipe { + in_first_cell_awaiting_pipe = false; + } else { + if last_cell.1 != 0 { + cell.0 = cell.1; + flush_cell(tokenizer, last_cell, in_delimiter_row, None); + } + + last_cell = cell; + cell = (last_cell.1, index, 0, 0); + } + } + // Exit events. + } else if event.name == Name::GfmTableHead { + after_head_awaiting_first_body_row = true; + last_table_end = index; + } else if event.name == Name::GfmTableRow || event.name == Name::GfmTableDelimiterRow { + in_row = false; + last_table_end = index; + if last_cell.1 != 0 { + cell.0 = cell.1; + flush_cell(tokenizer, last_cell, in_delimiter_row, Some(index)); + } else if cell.1 != 0 { + flush_cell(tokenizer, cell, in_delimiter_row, Some(index)); + } + } else if in_row + && (event.name == Name::Data + || event.name == Name::GfmTableDelimiterMarker + || event.name == Name::GfmTableDelimiterFiller) + { + cell.3 = index; + } + + index += 1; + } + + if last_table_end != 0 { + flush_table_end(tokenizer, last_table_end, last_table_has_body); + } +} + +/// Generate a cell. +fn flush_cell( + tokenizer: &mut Tokenizer, + range: (usize, usize, usize, usize), + in_delimiter_row: bool, + row_end: Option<usize>, +) { + let group_name = if in_delimiter_row { + Name::GfmTableDelimiterCell + } else { + Name::GfmTableCell + }; + let value_name = if in_delimiter_row { + Name::GfmTableDelimiterCellValue + } else { + Name::GfmTableCellText + }; + + // Insert an exit for the previous cell, if there is one. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- exit + // ^^^^-- this cell + // ``` + if range.0 != 0 { + tokenizer.map.add( + range.0, + 0, + vec![Event { + kind: Kind::Exit, + name: group_name.clone(), + point: tokenizer.events[range.0].point.clone(), + link: None, + }], + ); + } + + // Insert enter of this cell. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- enter + // ^^^^-- this cell + // ``` + tokenizer.map.add( + range.1, + 0, + vec![Event { + kind: Kind::Enter, + name: group_name.clone(), + point: tokenizer.events[range.1].point.clone(), + link: None, + }], + ); + + // Insert text start at first data start and end at last data end, and + // remove events between. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- enter + // ^-- exit + // ^^^^-- this cell + // ``` + if range.2 != 0 { + tokenizer.map.add( + range.2, + 0, + vec![Event { + kind: Kind::Enter, + name: value_name.clone(), + point: tokenizer.events[range.2].point.clone(), + link: None, + }], + ); + debug_assert_ne!(range.3, 0); + + if !in_delimiter_row { + tokenizer.events[range.2].link = Some(Link { + previous: None, + next: None, + content: Content::Text, + }); + + if !in_delimiter_row && range.3 > range.2 + 1 { + let a = range.2 + 1; + let b = range.3 - range.2 - 1; + tokenizer.map.add(a, b, vec![]); + } + } + + tokenizer.map.add( + range.3 + 1, + 0, + vec![Event { + kind: Kind::Exit, + name: value_name, + point: tokenizer.events[range.3].point.clone(), + link: None, + }], + ); + } + + // Insert an exit for the last cell, if at the row end. + // + // ```markdown + // > | | aa | bb | cc | + // ^-- exit + // ^^^^^^-- this cell (the last one contains two “between” parts) + // ``` + if let Some(row_end) = row_end { + tokenizer.map.add( + row_end, + 0, + vec![Event { + kind: Kind::Exit, + name: group_name, + point: tokenizer.events[row_end].point.clone(), + link: None, + }], + ); + } +} + +/// Generate table end (and table body end). +fn flush_table_end(tokenizer: &mut Tokenizer, index: usize, body: bool) { + let mut exits = vec![]; + + if body { + exits.push(Event { + kind: Kind::Exit, + name: Name::GfmTableBody, + point: tokenizer.events[index].point.clone(), + link: None, + }); + } + + exits.push(Event { + kind: Kind::Exit, + name: Name::GfmTable, + point: tokenizer.events[index].point.clone(), + link: None, + }); + + tokenizer.map.add(index + 1, 0, exits); +} |