From e485745c6924e41f2896f579b5454cfb800e13f6 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 28 Oct 2022 18:28:12 +0200 Subject: Fix GFM tables to require a non-pipe in header row Related-to: GH-20. --- src/construct/gfm_table.rs | 28 +++++++++++++++++++++------- tests/gfm_table.rs | 18 ++++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs index e055e1d..3f88073 100644 --- a/src/construct/gfm_table.rs +++ b/src/construct/gfm_table.rs @@ -312,6 +312,8 @@ pub fn head_row_start(tokenizer: &mut Tokenizer) -> State { Some(b'|') => State::Retry(StateName::GfmTableHeadRowBreak), _ => { tokenizer.tokenize_state.seen = true; + // Count the first character, that isn’t a pipe, double. + tokenizer.tokenize_state.size_b += 1; State::Retry(StateName::GfmTableHeadRowBreak) } } @@ -332,22 +334,34 @@ pub fn head_row_break(tokenizer: &mut Tokenizer) -> State { None => { tokenizer.tokenize_state.seen = false; tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.size_b = 0; State::Nok } Some(b'\n') => { - // Feel free to interrupt: - tokenizer.interrupt = true; - tokenizer.exit(Name::GfmTableRow); - tokenizer.enter(Name::LineEnding); - tokenizer.consume(); - tokenizer.exit(Name::LineEnding); - State::Next(StateName::GfmTableHeadDelimiterStart) + // If anything other than one pipe (ignoring whitespace) was used, it’s fine. + if tokenizer.tokenize_state.size_b > 1 { + tokenizer.tokenize_state.size_b = 0; + // Feel free to interrupt: + tokenizer.interrupt = true; + tokenizer.exit(Name::GfmTableRow); + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::GfmTableHeadDelimiterStart) + } else { + tokenizer.tokenize_state.seen = false; + tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.size_b = 0; + State::Nok + } } Some(b'\t' | b' ') => { tokenizer.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok); State::Retry(space_or_tab(tokenizer)) } _ => { + tokenizer.tokenize_state.size_b += 1; + // Whether a delimiter was seen. if tokenizer.tokenize_state.seen { tokenizer.tokenize_state.seen = false; diff --git a/tests/gfm_table.rs b/tests/gfm_table.rs index db1c364..c8f20ef 100644 --- a/tests/gfm_table.rs +++ b/tests/gfm_table.rs @@ -129,6 +129,24 @@ fn gfm_table() -> Result<(), String> { "should support empty body cells" ); + assert_eq!( + to_html_with_options(":\n|-|\n|a|\n\nb\n|-|\n|c|\n\n|\n|-|\n|d|\n\n|\n|-|\n|e|\n\n|:\n|-|\n|f|\n\n||\n|-|\n|g|\n\n| |\n|-|\n|h|\n", &Options::gfm())?, + "\n\n\n\n\n\n\n\n\n\n\n
:
a
\n\n\n\n\n\n\n\n\n\n\n\n
b
c
\n

|\n|-|\n|d|

\n

|\n|-|\n|e|

\n\n\n\n\n\n\n\n\n\n\n\n
:
f
\n\n\n\n\n\n\n\n\n\n\n\n
g
\n\n\n\n\n\n\n\n\n\n\n\n
h
\n", + "should need any character other than a single pipe in the header row" + ); + + assert_eq!( + to_html_with_options("a\n|-\n\nb\n||\n\nc\n|-|\n\nd\n|:|\n\ne\n| |\n\nf\n| -|\n\ng\n|- |\n", &Options::gfm())?, + "\n\n\n\n\n\n
a
\n

b\n||

\n\n\n\n\n\n\n
c
\n

d\n|:|

\n

e\n| |

\n\n\n\n\n\n\n
f
\n\n\n\n\n\n\n
g
\n", + "should need a dash in the delimimter row" + ); + + assert_eq!( + to_html_with_options("|\n|", &Options::gfm())?, + "

|\n|

", + "should need something" + ); + assert_eq!( to_html_with_options("| a |\n| - |\n- b", &Options::gfm())?, "\n\n\n\n\n\n
a
\n", -- cgit