From 2011d2ea4d6ec9d1bd7409c22e4258aceaaa3afe Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 12 Sep 2022 13:50:48 +0200 Subject: Fix whitespace in GFM footnote definition identifiers * Fix a crash on line endings in footnote definitions * Fix to match spaces and tabs in identifiers to `cmark-gfm` * Fix order of one attribute --- src/compiler.rs | 6 +- src/construct/document.rs | 11 ++- src/construct/gfm_footnote_definition.rs | 114 ++++++++++++++++++++++++++----- src/lib.rs | 20 +++--- src/state.rs | 8 +++ 5 files changed, 126 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/compiler.rs b/src/compiler.rs index 1f029f5..397e96f 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1828,15 +1828,13 @@ fn generate_footnote_item(context: &mut CompileContext, index: usize) { backreferences.push('-'); backreferences.push_str(&(reference_index + 1).to_string()); } - backreferences.push_str( - "\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"", - ); + backreferences.push_str("\" data-footnote-backref=\"\" aria-label=\""); if let Some(ref value) = context.options.gfm_footnote_back_label { backreferences.push_str(&encode(value, context.encode_html)); } else { backreferences.push_str("Back to content"); } - backreferences.push_str("\">↩"); + backreferences.push_str("\" class=\"data-footnote-backref\">↩"); if reference_index != 0 { backreferences.push_str(""); backreferences.push_str(&(reference_index + 1).to_string()); diff --git a/src/construct/document.rs b/src/construct/document.rs index 57c5f3a..45a961d 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -567,11 +567,20 @@ fn resolve(tokenizer: &mut Tokenizer) { child.map.consume(&mut child.events); + let mut flow_index = skip::to(&tokenizer.events, 0, &[Name::Data]); + while flow_index < tokenizer.events.len() + // To do: use `!is_some_and()` when that’s stable. + && (tokenizer.events[flow_index].link.is_none() + || tokenizer.events[flow_index].link.as_ref().unwrap().content != Content::Flow) + { + flow_index = skip::to(&tokenizer.events, flow_index + 1, &[Name::Data]); + } + // Now, add all child events into our parent document tokenizer. divide_events( &mut tokenizer.map, &tokenizer.events, - skip::to(&tokenizer.events, 0, &[Name::Data]), + flow_index, &mut child.events, ); diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs index cbe816f..d3f72d7 100644 --- a/src/construct/gfm_footnote_definition.rs +++ b/src/construct/gfm_footnote_definition.rs @@ -127,6 +127,8 @@ //! — while `CommonMark` prevents links in links, GitHub does not prevent footnotes (which turn into links) in links //! * [Footnote-like brackets around image, break that image](https://github.com/github/cmark-gfm/issues/275)\ //! — images can’t be used in what looks like a footnote call +//! * [GFM footnotes: line ending in footnote definition label causes text to disappear](https://github.com/github/cmark-gfm/issues/282)\ +//! — line endings in footnote definitions cause text to disappear //! //! ## Tokens //! @@ -164,11 +166,11 @@ //! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements use crate::construct::partial_space_or_tab::space_or_tab_min_max; -use crate::event::Name; +use crate::event::{Content, Link, Name}; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::{ - constant::TAB_SIZE, + constant::{LINK_REFERENCE_SIZE_MAX, TAB_SIZE}, normalize_identifier::normalize_identifier, skip, slice::{Position, Slice}, @@ -220,22 +222,104 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn label_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { - tokenizer.tokenize_state.token_1 = Name::GfmFootnoteDefinitionLabel; - tokenizer.tokenize_state.token_2 = Name::GfmFootnoteDefinitionLabelMarker; - tokenizer.tokenize_state.token_3 = Name::GfmFootnoteDefinitionLabelString; - tokenizer.tokenize_state.token_4 = Name::GfmFootnoteDefinitionMarker; - tokenizer.tokenize_state.marker = b'^'; tokenizer.enter(Name::GfmFootnoteDefinitionPrefix); - tokenizer.attempt( - State::Next(StateName::GfmFootnoteDefinitionLabelAfter), - State::Nok, - ); - State::Retry(StateName::LabelStart) + tokenizer.enter(Name::GfmFootnoteDefinitionLabel); + tokenizer.enter(Name::GfmFootnoteDefinitionLabelMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteDefinitionLabelMarker); + State::Next(StateName::GfmFootnoteDefinitionLabelAtMarker) } _ => State::Nok, } } +/// In label, at caret. +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn label_at_marker(tokenizer: &mut Tokenizer) -> State { + if tokenizer.current == Some(b'^') { + tokenizer.enter(Name::GfmFootnoteDefinitionMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteDefinitionMarker); + tokenizer.enter(Name::GfmFootnoteDefinitionLabelString); + tokenizer.enter_link( + Name::Data, + Link { + previous: None, + next: None, + content: Content::String, + }, + ); + State::Next(StateName::GfmFootnoteDefinitionLabelInside) + } else { + State::Nok + } +} + +/// In label. +/// +/// > 👉 **Note**: `cmark-gfm` prevents whitespace from occurring in footnote +/// > definition labels. +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn label_inside(tokenizer: &mut Tokenizer) -> State { + // Too long. + if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX + // Space or tab is not supported by GFM for some reason (`\n` and + // `[` make sense). + || matches!(tokenizer.current, None | Some(b'\t' | b'\n' | b' ' | b'[')) + // Closing brace with nothing. + || (matches!(tokenizer.current, Some(b']')) && tokenizer.tokenize_state.size == 0) + { + tokenizer.tokenize_state.size = 0; + State::Nok + } else if matches!(tokenizer.current, Some(b']')) { + tokenizer.tokenize_state.size = 0; + tokenizer.exit(Name::Data); + tokenizer.exit(Name::GfmFootnoteDefinitionLabelString); + tokenizer.enter(Name::GfmFootnoteDefinitionLabelMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteDefinitionLabelMarker); + tokenizer.exit(Name::GfmFootnoteDefinitionLabel); + State::Next(StateName::GfmFootnoteDefinitionLabelAfter) + } else { + let next = if matches!(tokenizer.current.unwrap(), b'\\') { + StateName::GfmFootnoteDefinitionLabelEscape + } else { + StateName::GfmFootnoteDefinitionLabelInside + }; + tokenizer.consume(); + tokenizer.tokenize_state.size += 1; + State::Next(next) + } +} + +/// After `\`, at a special character. +/// +/// > 👉 **Note**: `cmark-gfm` currently does not support escaped brackets: +/// > +/// +/// ```markdown +/// > | [^a\*b]: c +/// ^ +/// ``` +pub fn label_escape(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'[' | b'\\' | b']') => { + tokenizer.tokenize_state.size += 1; + tokenizer.consume(); + State::Next(StateName::GfmFootnoteDefinitionLabelInside) + } + _ => State::Retry(StateName::GfmFootnoteDefinitionLabelInside), + } +} + /// After definition label. /// /// ```markdown @@ -243,12 +327,6 @@ pub fn label_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn label_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Name::Data; - tokenizer.tokenize_state.token_2 = Name::Data; - tokenizer.tokenize_state.token_3 = Name::Data; - tokenizer.tokenize_state.token_4 = Name::Data; - tokenizer.tokenize_state.marker = 0; - match tokenizer.current { Some(b':') => { let end = skip::to_back( diff --git a/src/lib.rs b/src/lib.rs index 420b14d..47a125f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -584,7 +584,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_label` to use something else: @@ -597,7 +597,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Notes de bas de page

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Notes de bas de page

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } @@ -623,7 +623,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_label_tag_name` to use something else: @@ -636,7 +636,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } @@ -668,7 +668,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_label_attributes` to use something else: @@ -681,7 +681,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } @@ -708,7 +708,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_back_label` to use something else: @@ -721,7 +721,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } @@ -758,7 +758,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// /// // Pass `gfm_footnote_clobber_prefix` to use something else: @@ -771,7 +771,7 @@ pub struct Options { /// ..Options::default() /// } /// )?, - /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" /// ); /// # Ok(()) /// # } diff --git a/src/state.rs b/src/state.rs index dcabbd7..1d15239 100644 --- a/src/state.rs +++ b/src/state.rs @@ -197,6 +197,9 @@ pub enum Name { GfmFootnoteDefinitionStart, GfmFootnoteDefinitionLabelBefore, + GfmFootnoteDefinitionLabelAtMarker, + GfmFootnoteDefinitionLabelInside, + GfmFootnoteDefinitionLabelEscape, GfmFootnoteDefinitionLabelAfter, GfmFootnoteDefinitionWhitespaceAfter, GfmFootnoteDefinitionContStart, @@ -652,6 +655,11 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::GfmFootnoteDefinitionStart => construct::gfm_footnote_definition::start, Name::GfmFootnoteDefinitionLabelBefore => construct::gfm_footnote_definition::label_before, + Name::GfmFootnoteDefinitionLabelAtMarker => { + construct::gfm_footnote_definition::label_at_marker + } + Name::GfmFootnoteDefinitionLabelInside => construct::gfm_footnote_definition::label_inside, + Name::GfmFootnoteDefinitionLabelEscape => construct::gfm_footnote_definition::label_escape, Name::GfmFootnoteDefinitionLabelAfter => construct::gfm_footnote_definition::label_after, Name::GfmFootnoteDefinitionWhitespaceAfter => { construct::gfm_footnote_definition::whitespace_after -- cgit