diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-09-12 13:50:48 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-09-12 13:54:43 +0200 |
commit | 2011d2ea4d6ec9d1bd7409c22e4258aceaaa3afe (patch) | |
tree | e462579538174cb983dcc2be4b0418b8c5077050 /src/construct/gfm_footnote_definition.rs | |
parent | 640c103c0a2b92f7f2a49cfc0721577f40aa90e0 (diff) | |
download | markdown-rs-2011d2ea4d6ec9d1bd7409c22e4258aceaaa3afe.tar.gz markdown-rs-2011d2ea4d6ec9d1bd7409c22e4258aceaaa3afe.tar.bz2 markdown-rs-2011d2ea4d6ec9d1bd7409c22e4258aceaaa3afe.zip |
Fix whitespace in GFM footnote definition identifiers
* Fix a crash on line endings in footnote definitions
* Fix to match spaces and tabs in identifiers to `cmark-gfm`
* Fix order of one attribute
Diffstat (limited to '')
-rw-r--r-- | src/construct/gfm_footnote_definition.rs | 114 |
1 files changed, 96 insertions, 18 deletions
diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs index cbe816f..d3f72d7 100644 --- a/src/construct/gfm_footnote_definition.rs +++ b/src/construct/gfm_footnote_definition.rs @@ -127,6 +127,8 @@ //! β while `CommonMark` prevents links in links, GitHub does not prevent footnotes (which turn into links) in links //! * [Footnote-like brackets around image, break that image](https://github.com/github/cmark-gfm/issues/275)\ //! β images canβt be used in what looks like a footnote call +//! * [GFM footnotes: line ending in footnote definition label causes text to disappear](https://github.com/github/cmark-gfm/issues/282)\ +//! β line endings in footnote definitions cause text to disappear //! //! ## Tokens //! @@ -164,11 +166,11 @@ //! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements use crate::construct::partial_space_or_tab::space_or_tab_min_max; -use crate::event::Name; +use crate::event::{Content, Link, Name}; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::{ - constant::TAB_SIZE, + constant::{LINK_REFERENCE_SIZE_MAX, TAB_SIZE}, normalize_identifier::normalize_identifier, skip, slice::{Position, Slice}, @@ -220,22 +222,104 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { pub fn label_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { - tokenizer.tokenize_state.token_1 = Name::GfmFootnoteDefinitionLabel; - tokenizer.tokenize_state.token_2 = Name::GfmFootnoteDefinitionLabelMarker; - tokenizer.tokenize_state.token_3 = Name::GfmFootnoteDefinitionLabelString; - tokenizer.tokenize_state.token_4 = Name::GfmFootnoteDefinitionMarker; - tokenizer.tokenize_state.marker = b'^'; tokenizer.enter(Name::GfmFootnoteDefinitionPrefix); - tokenizer.attempt( - State::Next(StateName::GfmFootnoteDefinitionLabelAfter), - State::Nok, - ); - State::Retry(StateName::LabelStart) + tokenizer.enter(Name::GfmFootnoteDefinitionLabel); + tokenizer.enter(Name::GfmFootnoteDefinitionLabelMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteDefinitionLabelMarker); + State::Next(StateName::GfmFootnoteDefinitionLabelAtMarker) } _ => State::Nok, } } +/// In label, at caret. +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn label_at_marker(tokenizer: &mut Tokenizer) -> State { + if tokenizer.current == Some(b'^') { + tokenizer.enter(Name::GfmFootnoteDefinitionMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteDefinitionMarker); + tokenizer.enter(Name::GfmFootnoteDefinitionLabelString); + tokenizer.enter_link( + Name::Data, + Link { + previous: None, + next: None, + content: Content::String, + }, + ); + State::Next(StateName::GfmFootnoteDefinitionLabelInside) + } else { + State::Nok + } +} + +/// In label. +/// +/// > π **Note**: `cmark-gfm` prevents whitespace from occurring in footnote +/// > definition labels. +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn label_inside(tokenizer: &mut Tokenizer) -> State { + // Too long. + if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX + // Space or tab is not supported by GFM for some reason (`\n` and + // `[` make sense). + || matches!(tokenizer.current, None | Some(b'\t' | b'\n' | b' ' | b'[')) + // Closing brace with nothing. + || (matches!(tokenizer.current, Some(b']')) && tokenizer.tokenize_state.size == 0) + { + tokenizer.tokenize_state.size = 0; + State::Nok + } else if matches!(tokenizer.current, Some(b']')) { + tokenizer.tokenize_state.size = 0; + tokenizer.exit(Name::Data); + tokenizer.exit(Name::GfmFootnoteDefinitionLabelString); + tokenizer.enter(Name::GfmFootnoteDefinitionLabelMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteDefinitionLabelMarker); + tokenizer.exit(Name::GfmFootnoteDefinitionLabel); + State::Next(StateName::GfmFootnoteDefinitionLabelAfter) + } else { + let next = if matches!(tokenizer.current.unwrap(), b'\\') { + StateName::GfmFootnoteDefinitionLabelEscape + } else { + StateName::GfmFootnoteDefinitionLabelInside + }; + tokenizer.consume(); + tokenizer.tokenize_state.size += 1; + State::Next(next) + } +} + +/// After `\`, at a special character. +/// +/// > π **Note**: `cmark-gfm` currently does not support escaped brackets: +/// > <https://github.com/github/cmark-gfm/issues/240> +/// +/// ```markdown +/// > | [^a\*b]: c +/// ^ +/// ``` +pub fn label_escape(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'[' | b'\\' | b']') => { + tokenizer.tokenize_state.size += 1; + tokenizer.consume(); + State::Next(StateName::GfmFootnoteDefinitionLabelInside) + } + _ => State::Retry(StateName::GfmFootnoteDefinitionLabelInside), + } +} + /// After definition label. /// /// ```markdown @@ -243,12 +327,6 @@ pub fn label_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn label_after(tokenizer: &mut Tokenizer) -> State { - tokenizer.tokenize_state.token_1 = Name::Data; - tokenizer.tokenize_state.token_2 = Name::Data; - tokenizer.tokenize_state.token_3 = Name::Data; - tokenizer.tokenize_state.token_4 = Name::Data; - tokenizer.tokenize_state.marker = 0; - match tokenizer.current { Some(b':') => { let end = skip::to_back( |