From 2011d2ea4d6ec9d1bd7409c22e4258aceaaa3afe Mon Sep 17 00:00:00 2001
From: Titus Wormer
Date: Mon, 12 Sep 2022 13:50:48 +0200
Subject: Fix whitespace in GFM footnote definition identifiers
* Fix a crash on line endings in footnote definitions
* Fix to match spaces and tabs in identifiers to `cmark-gfm`
* Fix order of one attribute
---
src/compiler.rs | 6 +-
src/construct/document.rs | 11 +-
src/construct/gfm_footnote_definition.rs | 114 ++++++++++--
src/lib.rs | 20 +--
src/state.rs | 8 +
tests/gfm_footnote.rs | 286 +++++++++++++++++++------------
6 files changed, 303 insertions(+), 142 deletions(-)
diff --git a/src/compiler.rs b/src/compiler.rs
index 1f029f5..397e96f 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -1828,15 +1828,13 @@ fn generate_footnote_item(context: &mut CompileContext, index: usize) {
backreferences.push('-');
backreferences.push_str(&(reference_index + 1).to_string());
}
- backreferences.push_str(
- "\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"",
- );
+ backreferences.push_str("\" data-footnote-backref=\"\" aria-label=\"");
if let Some(ref value) = context.options.gfm_footnote_back_label {
backreferences.push_str(&encode(value, context.encode_html));
} else {
backreferences.push_str("Back to content");
}
- backreferences.push_str("\">↩");
+ backreferences.push_str("\" class=\"data-footnote-backref\">↩");
if reference_index != 0 {
backreferences.push_str("");
backreferences.push_str(&(reference_index + 1).to_string());
diff --git a/src/construct/document.rs b/src/construct/document.rs
index 57c5f3a..45a961d 100644
--- a/src/construct/document.rs
+++ b/src/construct/document.rs
@@ -567,11 +567,20 @@ fn resolve(tokenizer: &mut Tokenizer) {
child.map.consume(&mut child.events);
+ let mut flow_index = skip::to(&tokenizer.events, 0, &[Name::Data]);
+ while flow_index < tokenizer.events.len()
+ // To do: use `!is_some_and()` when that’s stable.
+ && (tokenizer.events[flow_index].link.is_none()
+ || tokenizer.events[flow_index].link.as_ref().unwrap().content != Content::Flow)
+ {
+ flow_index = skip::to(&tokenizer.events, flow_index + 1, &[Name::Data]);
+ }
+
// Now, add all child events into our parent document tokenizer.
divide_events(
&mut tokenizer.map,
&tokenizer.events,
- skip::to(&tokenizer.events, 0, &[Name::Data]),
+ flow_index,
&mut child.events,
);
diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs
index cbe816f..d3f72d7 100644
--- a/src/construct/gfm_footnote_definition.rs
+++ b/src/construct/gfm_footnote_definition.rs
@@ -127,6 +127,8 @@
//! — while `CommonMark` prevents links in links, GitHub does not prevent footnotes (which turn into links) in links
//! * [Footnote-like brackets around image, break that image](https://github.com/github/cmark-gfm/issues/275)\
//! — images can’t be used in what looks like a footnote call
+//! * [GFM footnotes: line ending in footnote definition label causes text to disappear](https://github.com/github/cmark-gfm/issues/282)\
+//! — line endings in footnote definitions cause text to disappear
//!
//! ## Tokens
//!
@@ -164,11 +166,11 @@
//! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements
use crate::construct::partial_space_or_tab::space_or_tab_min_max;
-use crate::event::Name;
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::{
- constant::TAB_SIZE,
+ constant::{LINK_REFERENCE_SIZE_MAX, TAB_SIZE},
normalize_identifier::normalize_identifier,
skip,
slice::{Position, Slice},
@@ -220,22 +222,104 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
pub fn label_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[') => {
- tokenizer.tokenize_state.token_1 = Name::GfmFootnoteDefinitionLabel;
- tokenizer.tokenize_state.token_2 = Name::GfmFootnoteDefinitionLabelMarker;
- tokenizer.tokenize_state.token_3 = Name::GfmFootnoteDefinitionLabelString;
- tokenizer.tokenize_state.token_4 = Name::GfmFootnoteDefinitionMarker;
- tokenizer.tokenize_state.marker = b'^';
tokenizer.enter(Name::GfmFootnoteDefinitionPrefix);
- tokenizer.attempt(
- State::Next(StateName::GfmFootnoteDefinitionLabelAfter),
- State::Nok,
- );
- State::Retry(StateName::LabelStart)
+ tokenizer.enter(Name::GfmFootnoteDefinitionLabel);
+ tokenizer.enter(Name::GfmFootnoteDefinitionLabelMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmFootnoteDefinitionLabelMarker);
+ State::Next(StateName::GfmFootnoteDefinitionLabelAtMarker)
}
_ => State::Nok,
}
}
+/// In label, at caret.
+///
+/// ```markdown
+/// > | [^a]: b
+/// ^
+/// ```
+pub fn label_at_marker(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(b'^') {
+ tokenizer.enter(Name::GfmFootnoteDefinitionMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmFootnoteDefinitionMarker);
+ tokenizer.enter(Name::GfmFootnoteDefinitionLabelString);
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::String,
+ },
+ );
+ State::Next(StateName::GfmFootnoteDefinitionLabelInside)
+ } else {
+ State::Nok
+ }
+}
+
+/// In label.
+///
+/// > 👉 **Note**: `cmark-gfm` prevents whitespace from occurring in footnote
+/// > definition labels.
+///
+/// ```markdown
+/// > | [^a]: b
+/// ^
+/// ```
+pub fn label_inside(tokenizer: &mut Tokenizer) -> State {
+ // Too long.
+ if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX
+ // Space or tab is not supported by GFM for some reason (`\n` and
+ // `[` make sense).
+ || matches!(tokenizer.current, None | Some(b'\t' | b'\n' | b' ' | b'['))
+ // Closing brace with nothing.
+ || (matches!(tokenizer.current, Some(b']')) && tokenizer.tokenize_state.size == 0)
+ {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ } else if matches!(tokenizer.current, Some(b']')) {
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.exit(Name::Data);
+ tokenizer.exit(Name::GfmFootnoteDefinitionLabelString);
+ tokenizer.enter(Name::GfmFootnoteDefinitionLabelMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmFootnoteDefinitionLabelMarker);
+ tokenizer.exit(Name::GfmFootnoteDefinitionLabel);
+ State::Next(StateName::GfmFootnoteDefinitionLabelAfter)
+ } else {
+ let next = if matches!(tokenizer.current.unwrap(), b'\\') {
+ StateName::GfmFootnoteDefinitionLabelEscape
+ } else {
+ StateName::GfmFootnoteDefinitionLabelInside
+ };
+ tokenizer.consume();
+ tokenizer.tokenize_state.size += 1;
+ State::Next(next)
+ }
+}
+
+/// After `\`, at a special character.
+///
+/// > 👉 **Note**: `cmark-gfm` currently does not support escaped brackets:
+/// >
+///
+/// ```markdown
+/// > | [^a\*b]: c
+/// ^
+/// ```
+pub fn label_escape(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'[' | b'\\' | b']') => {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::GfmFootnoteDefinitionLabelInside)
+ }
+ _ => State::Retry(StateName::GfmFootnoteDefinitionLabelInside),
+ }
+}
+
/// After definition label.
///
/// ```markdown
@@ -243,12 +327,6 @@ pub fn label_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn label_after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.tokenize_state.token_1 = Name::Data;
- tokenizer.tokenize_state.token_2 = Name::Data;
- tokenizer.tokenize_state.token_3 = Name::Data;
- tokenizer.tokenize_state.token_4 = Name::Data;
- tokenizer.tokenize_state.marker = 0;
-
match tokenizer.current {
Some(b':') => {
let end = skip::to_back(
diff --git a/src/lib.rs b/src/lib.rs
index 420b14d..47a125f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -584,7 +584,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
///
/// // Pass `gfm_footnote_label` to use something else:
@@ -597,7 +597,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
/// # Ok(())
/// # }
@@ -623,7 +623,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
///
/// // Pass `gfm_footnote_label_tag_name` to use something else:
@@ -636,7 +636,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
/// # Ok(())
/// # }
@@ -668,7 +668,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
///
/// // Pass `gfm_footnote_label_attributes` to use something else:
@@ -681,7 +681,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
/// # Ok(())
/// # }
@@ -708,7 +708,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
///
/// // Pass `gfm_footnote_back_label` to use something else:
@@ -721,7 +721,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
/// # Ok(())
/// # }
@@ -758,7 +758,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
///
/// // Pass `gfm_footnote_clobber_prefix` to use something else:
@@ -771,7 +771,7 @@ pub struct Options {
/// ..Options::default()
/// }
/// )?,
- /// "1
\n\n"
+ /// "1
\n\n"
/// );
/// # Ok(())
/// # }
diff --git a/src/state.rs b/src/state.rs
index dcabbd7..1d15239 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -197,6 +197,9 @@ pub enum Name {
GfmFootnoteDefinitionStart,
GfmFootnoteDefinitionLabelBefore,
+ GfmFootnoteDefinitionLabelAtMarker,
+ GfmFootnoteDefinitionLabelInside,
+ GfmFootnoteDefinitionLabelEscape,
GfmFootnoteDefinitionLabelAfter,
GfmFootnoteDefinitionWhitespaceAfter,
GfmFootnoteDefinitionContStart,
@@ -652,6 +655,11 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::GfmFootnoteDefinitionStart => construct::gfm_footnote_definition::start,
Name::GfmFootnoteDefinitionLabelBefore => construct::gfm_footnote_definition::label_before,
+ Name::GfmFootnoteDefinitionLabelAtMarker => {
+ construct::gfm_footnote_definition::label_at_marker
+ }
+ Name::GfmFootnoteDefinitionLabelInside => construct::gfm_footnote_definition::label_inside,
+ Name::GfmFootnoteDefinitionLabelEscape => construct::gfm_footnote_definition::label_escape,
Name::GfmFootnoteDefinitionLabelAfter => construct::gfm_footnote_definition::label_after,
Name::GfmFootnoteDefinitionWhitespaceAfter => {
construct::gfm_footnote_definition::whitespace_after
diff --git a/tests/gfm_footnote.rs b/tests/gfm_footnote.rs
index d371455..42c70d3 100644
--- a/tests/gfm_footnote.rs
+++ b/tests/gfm_footnote.rs
@@ -21,7 +21,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -43,7 +43,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -64,7 +64,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -85,7 +85,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -106,7 +106,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -132,7 +132,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -146,7 +146,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -185,7 +185,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -199,7 +199,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -215,7 +215,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -229,7 +229,7 @@ fn gfm_footnote() -> Result<(), String> {
@@ -256,7 +256,7 @@ fn gfm_footnote() -> Result<(), String> {
-
a
-b
+b
@@ -273,7 +273,7 @@ b
+
@@ -325,7 +325,7 @@ b