From 2011d2ea4d6ec9d1bd7409c22e4258aceaaa3afe Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 12 Sep 2022 13:50:48 +0200 Subject: Fix whitespace in GFM footnote definition identifiers * Fix a crash on line endings in footnote definitions * Fix to match spaces and tabs in identifiers to `cmark-gfm` * Fix order of one attribute --- src/compiler.rs | 6 +- src/construct/document.rs | 11 +- src/construct/gfm_footnote_definition.rs | 114 ++++++++++-- src/lib.rs | 20 +-- src/state.rs | 8 + tests/gfm_footnote.rs | 286 +++++++++++++++++++------------ 6 files changed, 303 insertions(+), 142 deletions(-) diff --git a/src/compiler.rs b/src/compiler.rs index 1f029f5..397e96f 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1828,15 +1828,13 @@ fn generate_footnote_item(context: &mut CompileContext, index: usize) { backreferences.push('-'); backreferences.push_str(&(reference_index + 1).to_string()); } - backreferences.push_str( - "\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"", - ); + backreferences.push_str("\" data-footnote-backref=\"\" aria-label=\""); if let Some(ref value) = context.options.gfm_footnote_back_label { backreferences.push_str(&encode(value, context.encode_html)); } else { backreferences.push_str("Back to content"); } - backreferences.push_str("\">↩"); + backreferences.push_str("\" class=\"data-footnote-backref\">↩"); if reference_index != 0 { backreferences.push_str(""); backreferences.push_str(&(reference_index + 1).to_string()); diff --git a/src/construct/document.rs b/src/construct/document.rs index 57c5f3a..45a961d 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -567,11 +567,20 @@ fn resolve(tokenizer: &mut Tokenizer) { child.map.consume(&mut child.events); + let mut flow_index = skip::to(&tokenizer.events, 0, &[Name::Data]); + while flow_index < tokenizer.events.len() + // To do: use `!is_some_and()` when that’s stable. + && (tokenizer.events[flow_index].link.is_none() + || tokenizer.events[flow_index].link.as_ref().unwrap().content != Content::Flow) + { + flow_index = skip::to(&tokenizer.events, flow_index + 1, &[Name::Data]); + } + // Now, add all child events into our parent document tokenizer. divide_events( &mut tokenizer.map, &tokenizer.events, - skip::to(&tokenizer.events, 0, &[Name::Data]), + flow_index, &mut child.events, ); diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs index cbe816f..d3f72d7 100644 --- a/src/construct/gfm_footnote_definition.rs +++ b/src/construct/gfm_footnote_definition.rs @@ -127,6 +127,8 @@ //! — while `CommonMark` prevents links in links, GitHub does not prevent footnotes (which turn into links) in links //! * [Footnote-like brackets around image, break that image](https://github.com/github/cmark-gfm/issues/275)\ //! — images can’t be used in what looks like a footnote call +//! * [GFM footnotes: line ending in footnote definition label causes text to disappear](https://github.com/github/cmark-gfm/issues/282)\ +//! — line endings in footnote definitions cause text to disappear //! //! ## Tokens //! @@ -164,11 +166,11 @@ //! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements use crate::construct::partial_space_or_tab::space_or_tab_min_max; -use crate::event::Name; +use crate::event::{Content, Link, Name}; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::{ - constant::TAB_SIZE, + constant::{LINK_REFERENCE_SIZE_MAX, TAB_SIZE}, normalize_identifier::normalize_identifier, skip, slice::{Position, Slice}, @@ -220,22 +222,104 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn label_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { - tokenizer.tokenize_state.token_1 = Name::GfmFootnoteDefinitionLabel; - tokenizer.tokenize_state.token_2 = Name::GfmFootnoteDefinitionLabelMarker; - tokenizer.tokenize_state.token_3 = Name::GfmFootnoteDefinitionLabelString; - tokenizer.tokenize_state.token_4 = Name::GfmFootnoteDefinitionMarker; - tokenizer.tokenize_state.marker = b'^'; tokenizer.enter(Name::GfmFootnoteDefinitionPrefix); - tokenizer.attempt( - State::Next(StateName::GfmFootnoteDefinitionLabelAfter), - State::Nok, - ); - State::Retry(StateName::LabelStart) + tokenizer.enter(Name::GfmFootnoteDefinitionLabel); + tokenizer.enter(Name::GfmFootnoteDefinitionLabelMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteDefinitionLabelMarker); + State::Next(StateName::GfmFootnoteDefinitionLabelAtMarker) } _ => State::Nok, } } +/// In label, at caret. +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn label_at_marker(tokenizer: &mut Tokenizer) -> State { + if tokenizer.current == Some(b'^') { + tokenizer.enter(Name::GfmFootnoteDefinitionMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteDefinitionMarker); + tokenizer.enter(Name::GfmFootnoteDefinitionLabelString); + tokenizer.enter_link( + Name::Data, + Link { + previous: None, + next: None, + content: Content::String, + }, + ); + State::Next(StateName::GfmFootnoteDefinitionLabelInside) + } else { + State::Nok + } +} + +/// In label. +/// +/// > 👉 **Note**: `cmark-gfm` prevents whitespace from occurring in footnote +/// > definition labels. +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn label_inside(tokenizer: &mut Tokenizer) -> State { + // Too long. + if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX + // Space or tab is not supported by GFM for some reason (`\n` and + // `[` make sense). + || matches!(tokenizer.current, None | Some(b'\t' | b'\n' | b' ' | b'[')) + // Closing brace with nothing. + || (matches!(tokenizer.current, Some(b']')) && tokenizer.tokenize_state.size == 0) + { + tokenizer.tokenize_state.size = 0; + State::Nok + } else if matches!(tokenizer.current, Some(b']')) { + tokenizer.tokenize_state.size = 0; + tokenizer.exit(Name::Data); + tokenizer.exit(Name::GfmFootnoteDefinitionLabelString); + tokenizer.enter(Name::GfmFootnoteDefinitionLabelMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteDefinitionLabelMarker); + tokenizer.exit(Name::GfmFootnoteDefinitionLabel); + State::Next(StateName::GfmFootnoteDefinitionLabelAfter) + } else { + let next = if matches!(tokenizer.current.unwrap(), b'\\') { + StateName::GfmFootnoteDefinitionLabelEscape + } else { + StateName::GfmFootnoteDefinitionLabelInside + }; + tokenizer.consume(); + tokenizer.tokenize_state.size += 1; + State::Next(next) + } +} + +/// After `\`, at a special character. +/// +/// > 👉 **Note**: `cmark-gfm` currently does not support escaped brackets: +/// > +/// +/// ```markdown +/// > | [^a\*b]: c +/// ^ +/// ``` +pub fn label_escape(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'[' | b'\\' | b']') => { + tokenizer.tokenize_state.size += 1; + tokenizer.consume(); + State::Next(StateName::GfmFootnoteDefinitionLabelInside) + } + _ => State::Retry(StateName::GfmFootnoteDefinitionLabelInside), + } +} + /// After definition label. /// /// ```markdown @@ -243,12 +327,6 @@ pub fn label_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn label_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Name::Data; - tokenizer.tokenize_state.token_2 = Name::Data; - tokenizer.tokenize_state.token_3 = Name::Data; - tokenizer.tokenize_state.token_4 = Name::Data; - tokenizer.tokenize_state.marker = 0; - match tokenizer.current { Some(b':') => { let end = skip::to_back( diff --git a/src/lib.rs b/src/lib.rs index 420b14d..47a125f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -584,7 +584,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_label` to use something else: @@ -597,7 +597,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Notes de bas de page

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Notes de bas de page

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } @@ -623,7 +623,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_label_tag_name` to use something else: @@ -636,7 +636,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } @@ -668,7 +668,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_label_attributes` to use something else: @@ -681,7 +681,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } @@ -708,7 +708,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_back_label` to use something else: @@ -721,7 +721,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } @@ -758,7 +758,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_clobber_prefix` to use something else: @@ -771,7 +771,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } diff --git a/src/state.rs b/src/state.rs index dcabbd7..1d15239 100644 --- a/src/state.rs +++ b/src/state.rs @@ -197,6 +197,9 @@ pub enum Name { GfmFootnoteDefinitionStart, GfmFootnoteDefinitionLabelBefore, + GfmFootnoteDefinitionLabelAtMarker, + GfmFootnoteDefinitionLabelInside, + GfmFootnoteDefinitionLabelEscape, GfmFootnoteDefinitionLabelAfter, GfmFootnoteDefinitionWhitespaceAfter, GfmFootnoteDefinitionContStart, @@ -652,6 +655,11 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::GfmFootnoteDefinitionStart => construct::gfm_footnote_definition::start, Name::GfmFootnoteDefinitionLabelBefore => construct::gfm_footnote_definition::label_before, + Name::GfmFootnoteDefinitionLabelAtMarker => { + construct::gfm_footnote_definition::label_at_marker + } + Name::GfmFootnoteDefinitionLabelInside => construct::gfm_footnote_definition::label_inside, + Name::GfmFootnoteDefinitionLabelEscape => construct::gfm_footnote_definition::label_escape, Name::GfmFootnoteDefinitionLabelAfter => construct::gfm_footnote_definition::label_after, Name::GfmFootnoteDefinitionWhitespaceAfter => { construct::gfm_footnote_definition::whitespace_after diff --git a/tests/gfm_footnote.rs b/tests/gfm_footnote.rs index d371455..42c70d3 100644 --- a/tests/gfm_footnote.rs +++ b/tests/gfm_footnote.rs @@ -21,7 +21,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    whatevs

    +

    whatevs

@@ -43,7 +43,7 @@ fn gfm_footnote() -> Result<(), String> {

Voetnoten

  1. -

    dingen

    +

    dingen

@@ -64,7 +64,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    b

    +

    b

@@ -85,7 +85,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    b

    +

    b

@@ -106,7 +106,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    b

    +

    b

@@ -132,7 +132,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    c

    +

    c

@@ -146,7 +146,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    d 2

    +

    d 2

@@ -185,7 +185,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    y

    +

    y

@@ -199,7 +199,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    y

    +

    y

@@ -215,7 +215,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    y

    +

    y

@@ -229,7 +229,7 @@ fn gfm_footnote() -> Result<(), String> {

Footnotes

  1. -

    y

    +

    y

@@ -256,7 +256,7 @@ fn gfm_footnote() -> Result<(), String> {
  1. a -b

    +b

@@ -273,7 +273,7 @@ b
  • a -b

    +b

  • @@ -290,7 +290,7 @@ b
  • a -b

    +b

  • @@ -308,7 +308,7 @@ b

    b

    -
    + @@ -325,7 +325,7 @@ b

    Footnotes

    1. -

      y

      +

      y

    @@ -365,7 +365,7 @@ a!

    Footnotes

    1. -

      b 2

      +

      b 2

    @@ -391,7 +391,7 @@ a!

    Footnotes

    1. -

      b

      +

      b

    @@ -437,10 +437,10 @@ even another caret.

    Footnotes

    1. -

      numbers

      +

      numbers

    2. -

      caret

      +

      caret

    @@ -448,6 +448,74 @@ even another caret.

    "should match calls like GitHub" ); + // Note: + // * GH does not support line ending in call. + // See: + // Here line endings don’t make text disappear. + assert_eq!( + micromark_with_options( + r###"[^a]: # b + +[^c d]: # e + +[^f g]: # h + +[^i +j]: # k + +[^ l]: # l + +[^m ]: # m + +xxx[^a], [^c d], [^f g], [^i +j], [^ l], [^m ] + +--- + +Some calls.[^ w][^x ][^y][^z] + +[^w]: # w + +[^x]: # x + +[^ y]: # y + +[^x ]: # z +"###, + &gfm + )?, + r###"

    [^c d]: # e

    +

    [^f g]: # h

    +

    [^i +j]: # k

    +

    [^ l]: # l

    +

    [^m ]: # m

    +

    xxx1, [^c d], [^f g], [^i +j], [^ l], [^m ]

    +
    +

    Some calls.23[^y][^z]

    +

    [^ y]: # y

    +

    3: # z

    +

    Footnotes

    +
      +
    1. +

      b

      + +
    2. +
    3. +

      w

      + +
    4. +
    5. +

      x

      + 2 +
    6. +
    +
    +"###, + "should match whitespace in calls like GitHub (except for the bugs)" + ); + assert_eq!( micromark_with_options( r###"[^*emphasis*] @@ -504,22 +572,22 @@ even another caret.

    Footnotes

    1. -

      a

      +

      a

    2. -

      a

      +

      a

    3. -

      a

      +

      a

    4. -

      a

      +

      a

    5. -

      a

      +

      a

    6. -

      a

      +

      a

    @@ -555,16 +623,16 @@ even another caret.

    Footnotes

    1. -

      Defined in a block quote.

      +

      Defined in a block quote.

    2. -

      Directly after a block quote.

      +

      Directly after a block quote.

    3. -

      Defined in a list item.

      +

      Defined in a list item.

    4. -

      Directly after a list item.

      +

      Directly after a list item.

    @@ -612,21 +680,21 @@ even another caret.

    1. Paragraph -…continuation

      +…continuation

    2. Paragraph …continuation

      “code”, which is paragraphs…

      -

      …because of the indent!

      +

      …because of the indent!

    3. Paragraph -…continuation

      +…continuation

    4. Paragraph -…continuation

      +…continuation

    @@ -664,19 +732,19 @@ Lazy!

    1. - +
    2. -

      Paragraph.

      +

      Paragraph.

    3. - +
    4. -

      Another blank.

      +

      Another blank.

    5. - +
    @@ -717,46 +785,46 @@ Lazy!

    Footnotes

    1. -

      alpha

      +

      alpha

    2. -

      bravo

      +

      bravo

    3. charlie -indented delta

      +indented delta

    4. -

      echo

      +

      echo

    5. -

      foxtrot

      +

      foxtrot

    6. golf

      - +
    7. hotel

      - +
    8. india

      - +
    9. juliett

      - +

    10. - +
      • @@ -770,7 +838,7 @@ indented delta +
    @@ -790,7 +858,7 @@ indented delta

    Footnotes

    1. -

      Recursion11 2 3 4

      +

      Recursion11 2 3 4

    @@ -812,7 +880,7 @@ indented delta

    Footnotes

    1. -

      a

      +

      a

    @@ -858,16 +926,16 @@ indented delta

    Footnotes

    1. -

      a

      +

      a

    2. -

      b

      +

      b

    3. -

      d

      +

      d

    4. -

      e

      +

      e

    @@ -891,13 +959,13 @@ indented delta

    Footnotes

    1. -

      a

      +

      a

    2. -

      b

      +

      b

    3. -

      c

      +

      c

    @@ -941,28 +1009,28 @@ indented delta

    Paragraph -…continuation

    +…continuation

  • -

    Another

    +

    Another

  • Paragraph -…continuation

    +…continuation

  • Paragraph …continuation “code”, which is paragraphs…

    -

    …because of the indent!

    +

    …because of the indent!

  • Paragraph -…continuation

    +…continuation

  • Paragraph -…continuation

    +…continuation

  • @@ -986,13 +1054,13 @@ indented delta

    Footnotes

    1. -

      a

      +

      a

    2. -

      b

      +

      b

    3. -

      c

      +

      c

    @@ -1043,18 +1111,18 @@ indented delta

    Footnotes

    1. -

      Paragraph

      +

      Paragraph

    2. Paragraph

      “code”, which is paragraphs…

      -

      …because of the indent!

      +

      …because of the indent!

    3. -

      Paragraph

      +

      Paragraph

    4. -

      Paragraph

      +

      Paragraph

    @@ -1100,7 +1168,7 @@ indented delta

    Paragraph

    Heading

    -
    +
  • Paragraph

    @@ -1109,21 +1177,21 @@ indented delta +
  • Paragraph

    block quote

    - +
  • Paragraph

    • list
    - +
  • @@ -1152,16 +1220,16 @@ more code

    Footnotes

    1. -

      Paragraph

      +

      Paragraph

    2. -

      Paragraph

      +

      Paragraph

    3. - +
    4. -

      Paragraph

      +

      Paragraph

    @@ -1204,18 +1272,18 @@ more code

    Footnotes

    1. -

      Paragraph

      +

      Paragraph

    2. Paragraph

      “code”, which is paragraphs…

      -

      …because of the indent!

      +

      …because of the indent!

    3. -

      Paragraph

      +

      Paragraph

    4. -

      Paragraph

      +

      Paragraph

    @@ -1253,7 +1321,7 @@ more code
  • Paragraph

    Heading

    - +
  • Paragraph

    @@ -1261,21 +1329,21 @@ more code more code - +
  • Paragraph

    block quote

    - +
  • Paragraph

    • list
    - +
  • @@ -1311,7 +1379,7 @@ isn’t indented.

    Footnotes

    1. -

      Here is the footnote.

      +

      Here is the footnote.

    2. Here’s one with multiple blocks.

      @@ -1321,7 +1389,7 @@ belong to the previous footnote.

      The whole paragraph can be indented, or just the first line. In this way, multi-paragraph footnotes work like -multi-paragraph list items.

      +multi-paragraph list items.

    @@ -1384,38 +1452,38 @@ multi-paragraph list items.

    Footnotes

    1. -

      3

      +

      3

    2. -

      2

      +

      2

    3. -

      1

      +

      1

    4. -

      0

      +

      0

    5. 3

      -

      5

      +

      5

    6. 3

      -

      4

      +

      4

    7. -

      3

      +

      3

    8. 2

      -

      5

      +

      5

    9. 2

      -

      4

      +

      4

    10. -

      2

      +

      2

    @@ -1476,35 +1544,35 @@ multi-paragraph list items. a

    8
     
    -
    +
  • a

    -

    7

    +

    7

  • a

    -

    6

    +

    6

  • a

    -

    5

    +

    5

  • a

    -

    4

    +

    4

  • -

    a

    +

    a

  • -

    a

    +

    a

  • -

    a

    +

    a

  • -

    a

    +

    a

  • -- cgit