diff options
Diffstat (limited to '')
| -rw-r--r-- | src/compiler.rs | 18 | ||||
| -rw-r--r-- | src/construct/attention.rs | 206 | ||||
| -rw-r--r-- | src/construct/autolink.rs | 2 | ||||
| -rw-r--r-- | src/construct/block_quote.rs | 4 | ||||
| -rw-r--r-- | src/construct/character_escape.rs | 3 | ||||
| -rw-r--r-- | src/construct/character_reference.rs | 4 | ||||
| -rw-r--r-- | src/construct/code_fenced.rs | 6 | ||||
| -rw-r--r-- | src/construct/code_indented.rs | 2 | ||||
| -rw-r--r-- | src/construct/code_text.rs | 2 | ||||
| -rw-r--r-- | src/construct/definition.rs | 2 | ||||
| -rw-r--r-- | src/construct/frontmatter.rs | 2 | ||||
| -rw-r--r-- | src/construct/hard_break_escape.rs | 4 | ||||
| -rw-r--r-- | src/construct/heading_atx.rs | 4 | ||||
| -rw-r--r-- | src/construct/heading_setext.rs | 4 | ||||
| -rw-r--r-- | src/construct/html_flow.rs | 4 | ||||
| -rw-r--r-- | src/construct/html_text.rs | 2 | ||||
| -rw-r--r-- | src/construct/label_end.rs | 2 | ||||
| -rw-r--r-- | src/construct/label_start_image.rs | 3 | ||||
| -rw-r--r-- | src/construct/label_start_link.rs | 3 | ||||
| -rw-r--r-- | src/construct/list_item.rs | 4 | ||||
| -rw-r--r-- | src/construct/text.rs | 14 | ||||
| -rw-r--r-- | src/construct/thematic_break.rs | 4 | ||||
| -rw-r--r-- | src/event.rs | 59 | ||||
| -rw-r--r-- | src/lib.rs | 61 | ||||
| -rw-r--r-- | src/parser.rs | 7 | 
25 files changed, 307 insertions, 119 deletions
| diff --git a/src/compiler.rs b/src/compiler.rs index 2e13294..abf35c8 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -326,6 +326,7 @@ fn enter(context: &mut CompileContext) {          Name::DefinitionDestinationString => on_enter_definition_destination_string(context),          Name::Emphasis => on_enter_emphasis(context),          Name::Frontmatter => on_enter_frontmatter(context), +        Name::GfmStrikethrough => on_enter_gfm_strikethrough(context),          Name::HtmlFlow => on_enter_html_flow(context),          Name::HtmlText => on_enter_html_text(context),          Name::Image => on_enter_image(context), @@ -369,6 +370,7 @@ fn exit(context: &mut CompileContext) {          Name::DefinitionTitleString => on_exit_definition_title_string(context),          Name::Emphasis => on_exit_emphasis(context),          Name::Frontmatter => on_exit_frontmatter(context), +        Name::GfmStrikethrough => on_exit_gfm_strikethrough(context),          Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context),          Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context),          Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context), @@ -467,6 +469,13 @@ fn on_enter_frontmatter(context: &mut CompileContext) {      context.buffer();  } +/// Handle [`Enter`][Kind::Enter]:[`GfmStrikethrough`][Name::GfmStrikethrough]. +fn on_enter_gfm_strikethrough(context: &mut CompileContext) { +    if !context.image_alt_inside { +        context.push("<del>"); +    } +} +  /// Handle [`Enter`][Kind::Enter]:[`HtmlFlow`][Name::HtmlFlow].  fn on_enter_html_flow(context: &mut CompileContext) {      context.line_ending_if_needed(); @@ -898,7 +907,7 @@ fn on_exit_definition_title_string(context: &mut CompileContext) {      context.media_stack.last_mut().unwrap().title = Some(buf);  } -/// Handle [`Exit`][Kind::Exit]:[`Strong`][Name::Emphasis]. +/// Handle [`Exit`][Kind::Exit]:[`Emphasis`][Name::Emphasis].  fn on_exit_emphasis(context: &mut CompileContext) {      if !context.image_alt_inside {          context.push("</em>"); @@ -942,6 +951,13 @@ fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) {      on_exit_autolink_email(context);  } +/// Handle [`Exit`][Kind::Exit]:[`GfmStrikethrough`][Name::GfmStrikethrough]. +fn on_exit_gfm_strikethrough(context: &mut CompileContext) { +    if !context.image_alt_inside { +        context.push("</del>"); +    } +} +  /// Handle [`Exit`][Kind::Exit]:[`HeadingAtx`][Name::HeadingAtx].  fn on_exit_heading_atx(context: &mut CompileContext) {      let rank = context diff --git a/src/construct/attention.rs b/src/construct/attention.rs index ef960d4..526f58c 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -1,4 +1,5 @@ -//! Attention (emphasis and strong) occurs in the [text][] content type. +//! Attention (emphasis, strong, optionally GFM strikethrough) occurs in the +//! [text][] content type.  //!  //! ## Grammar  //! @@ -7,24 +8,31 @@  //!  //! ```bnf  //! attention_sequence ::= 1*'*' | 1*'_' +//! gfm_attention_sequence ::= 1*'~'  //! ```  //!  //! Sequences are matched together to form attention based on which character -//! they contain, and what character occurs before and after each sequence. +//! they contain, how long they are, and what character occurs before and after +//! each sequence.  //! Otherwise they are turned into data.  //!  //! ## HTML  //! -//! When sequences match, and two markers can be “taken” from them, they -//! together relate to the `<strong>` element in HTML. +//! When asterisk/underscore sequences match, and two markers can be “taken” +//! from them, they together relate to the `<strong>` element in HTML.  //! When one marker can be taken, they relate to the `<em>` element.  //! See [*§ 4.5.2 The `em` element*][html-em] and  //! [*§ 4.5.3 The `strong` element*][html-strong] in the HTML spec for more  //! info.  //! +//! When tilde sequences match, they together relate to the `<del>` element in +//! HTML. +//! See [*§ 4.7.2 The `del` element*][html-del] in the HTML spec for more info. +//!  //! ## Recommendation  //! -//! It is recommended to use asterisks for attention when writing markdown. +//! It is recommended to use asterisks for emphasis/strong attention when +//! writing markdown.  //!  //! There are some small differences in whether sequences can open and/or close  //! based on whether they are formed with asterisks or underscores. @@ -37,11 +45,18 @@  //! can look for asterisks to find syntax while not worrying about other  //! characters.  //! +//! For strikethrough attention, it is recommended to use two markers. +//! While `github.com` allows single tildes too, it technically prohibits it in +//! their spec. +//!  //! ## Tokens  //!  //! *   [`Emphasis`][Name::Emphasis]  //! *   [`EmphasisSequence`][Name::EmphasisSequence]  //! *   [`EmphasisText`][Name::EmphasisText] +//! *   [`GfmStrikethrough`][Name::GfmStrikethrough] +//! *   [`GfmStrikethroughSequence`][Name::GfmStrikethroughSequence] +//! *   [`GfmStrikethroughText`][Name::GfmStrikethroughText]  //! *   [`Strong`][Name::Strong]  //! *   [`StrongSequence`][Name::StrongSequence]  //! *   [`StrongText`][Name::StrongText] @@ -52,11 +67,14 @@  //! ## References  //!  //! *   [`attention.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/attention.js) +//! *   [`micromark-extension-gfm-strikethrough`](https://github.com/micromark/micromark-extension-gfm-strikethrough)  //! *   [*§ 6.2 Emphasis and strong emphasis* in `CommonMark`](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis) +//! *   [*§ 6.5 Strikethrough (extension)* in `GFM`](https://github.github.com/gfm/#strikethrough-extension-)  //!  //! [text]: crate::construct::text  //! [html-em]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-em-element  //! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element +//! [html-del]: https://html.spec.whatwg.org/multipage/edits.html#the-del-element  use crate::event::{Event, Kind, Name, Point};  use crate::resolve::Name as ResolveName; @@ -94,7 +112,11 @@ struct Sequence {  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.attention && matches!(tokenizer.current, Some(b'*' | b'_')) +    // Emphasis/strong: +    if (tokenizer.parse_state.options.constructs.attention +        && matches!(tokenizer.current, Some(b'*' | b'_'))) +        // GFM strikethrough: +        || (tokenizer.parse_state.options.constructs.gfm_strikethrough && tokenizer.current == Some(b'~'))      {          tokenizer.tokenize_state.marker = tokenizer.current.unwrap();          tokenizer.enter(Name::AttentionSequence); @@ -117,85 +139,15 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {      } else {          tokenizer.exit(Name::AttentionSequence);          tokenizer.register_resolver(ResolveName::Attention); -        tokenizer.tokenize_state.marker = b'\0'; +        tokenizer.tokenize_state.marker = 0;          State::Ok      }  } -/// Resolve attention sequences. +/// Resolve sequences.  pub fn resolve(tokenizer: &mut Tokenizer) { -    let mut index = 0; -    let mut balance = 0; -    let mut sequences = vec![]; -      // Find all sequences, gather info about them. -    while index < tokenizer.events.len() { -        let enter = &tokenizer.events[index]; - -        if enter.kind == Kind::Enter { -            balance += 1; - -            if enter.name == Name::AttentionSequence { -                let end = index + 1; -                let exit = &tokenizer.events[end]; - -                let before_end = enter.point.index; -                let before_start = if before_end < 4 { 0 } else { before_end - 4 }; -                let char_before = -                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]) -                        .chars() -                        .last(); - -                let after_start = exit.point.index; -                let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() { -                    tokenizer.parse_state.bytes.len() -                } else { -                    after_start + 4 -                }; -                let char_after = -                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]) -                        .chars() -                        .next(); - -                let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point) -                    .head() -                    .unwrap(); -                let before = classify_opt(char_before); -                let after = classify_opt(char_after); -                let open = after == CharacterKind::Other -                    || (after == CharacterKind::Punctuation && before != CharacterKind::Other); -                // To do: GFM strikethrough? -                // || char_after == '~' -                let close = before == CharacterKind::Other -                    || (before == CharacterKind::Punctuation && after != CharacterKind::Other); -                // To do: GFM strikethrough? -                // || char_before == '~' - -                sequences.push(Sequence { -                    index, -                    balance, -                    start_point: enter.point.clone(), -                    end_point: exit.point.clone(), -                    size: exit.point.index - enter.point.index, -                    open: if marker == b'*' { -                        open -                    } else { -                        open && (before != CharacterKind::Other || !close) -                    }, -                    close: if marker == b'*' { -                        close -                    } else { -                        close && (after != CharacterKind::Other || !open) -                    }, -                    marker, -                }); -            } -        } else { -            balance -= 1; -        } - -        index += 1; -    } +    let mut sequences = get_sequences(tokenizer);      // Now walk through them and match them.      let mut close = 0; @@ -230,7 +182,20 @@ pub fn resolve(tokenizer: &mut Tokenizer) {                          continue;                      } -                    // We’ve found a match! +                    // For GFM strikethrough: +                    // * both sequences must have the same size +                    // * more than 2 markers don’t work +                    // * one marker is prohibited by the spec, but supported by GH +                    if sequence_close.marker == b'~' +                        && (sequence_close.size != sequence_open.size +                            || sequence_close.size > 2 +                            || sequence_close.size == 1 +                                && !tokenizer.parse_state.options.gfm_strikethrough_single_tilde) +                    { +                        continue; +                    } + +                    // We found a match!                      next_index = match_sequences(tokenizer, &mut sequences, open, close);                      break; @@ -253,7 +218,80 @@ pub fn resolve(tokenizer: &mut Tokenizer) {      tokenizer.map.consume(&mut tokenizer.events);  } +/// Get sequences. +fn get_sequences(tokenizer: &mut Tokenizer) -> Vec<Sequence> { +    let mut index = 0; +    let mut balance = 0; +    let mut sequences = vec![]; + +    while index < tokenizer.events.len() { +        let enter = &tokenizer.events[index]; + +        if enter.kind == Kind::Enter { +            balance += 1; + +            if enter.name == Name::AttentionSequence { +                let end = index + 1; +                let exit = &tokenizer.events[end]; + +                let before_end = enter.point.index; +                let before_start = if before_end < 4 { 0 } else { before_end - 4 }; +                let after_start = exit.point.index; +                let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() { +                    tokenizer.parse_state.bytes.len() +                } else { +                    after_start + 4 +                }; + +                let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point) +                    .head() +                    .unwrap(); +                let before = classify_opt( +                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]) +                        .chars() +                        .last(), +                ); +                let after = classify_opt( +                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]) +                        .chars() +                        .next(), +                ); +                let open = after == CharacterKind::Other +                    || (after == CharacterKind::Punctuation && before != CharacterKind::Other); +                let close = before == CharacterKind::Other +                    || (before == CharacterKind::Punctuation && after != CharacterKind::Other); + +                sequences.push(Sequence { +                    index, +                    balance, +                    start_point: enter.point.clone(), +                    end_point: exit.point.clone(), +                    size: exit.point.index - enter.point.index, +                    open: if marker == b'_' { +                        open && (before != CharacterKind::Other || !close) +                    } else { +                        open +                    }, +                    close: if marker == b'_' { +                        close && (after != CharacterKind::Other || !open) +                    } else { +                        close +                    }, +                    marker, +                }); +            } +        } else { +            balance -= 1; +        } + +        index += 1; +    } + +    sequences +} +  /// Match two sequences. +#[allow(clippy::too_many_lines)]  fn match_sequences(      tokenizer: &mut Tokenizer,      sequences: &mut Vec<Sequence>, @@ -292,7 +330,13 @@ fn match_sequences(          between += 1;      } -    let (group_name, seq_name, text_name) = if take == 1 { +    let (group_name, seq_name, text_name) = if sequences[open].marker == b'~' { +        ( +            Name::GfmStrikethrough, +            Name::GfmStrikethroughSequence, +            Name::GfmStrikethroughText, +        ) +    } else if take == 1 {          (Name::Emphasis, Name::EmphasisSequence, Name::EmphasisText)      } else {          (Name::Strong, Name::StrongSequence, Name::StrongText) diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 4ecd580..21f8fa5 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -135,7 +135,7 @@ use crate::util::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};  ///      ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.autolink && tokenizer.current == Some(b'<') { +    if tokenizer.parse_state.options.constructs.autolink && tokenizer.current == Some(b'<') {          tokenizer.enter(Name::Autolink);          tokenizer.enter(Name::AutolinkMarker);          tokenizer.consume(); diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 039c839..11783d0 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -59,7 +59,7 @@ use crate::util::constant::TAB_SIZE;  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.block_quote { +    if tokenizer.parse_state.options.constructs.block_quote {          tokenizer.enter(Name::BlockQuote);          State::Retry(StateName::BlockQuoteContStart)      } else { @@ -82,7 +82,7 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State {          State::Retry(space_or_tab_min_max(              tokenizer,              1, -            if tokenizer.parse_state.constructs.code_indented { +            if tokenizer.parse_state.options.constructs.code_indented {                  TAB_SIZE - 1              } else {                  usize::MAX diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 438092e..67946a0 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -53,7 +53,8 @@ use crate::tokenizer::Tokenizer;  ///      ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.character_escape && tokenizer.current == Some(b'\\') { +    if tokenizer.parse_state.options.constructs.character_escape && tokenizer.current == Some(b'\\') +    {          tokenizer.enter(Name::CharacterEscape);          tokenizer.enter(Name::CharacterEscapeMarker);          tokenizer.consume(); diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 4669836..927e3d9 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -90,7 +90,9 @@ use crate::util::{  ///      ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.character_reference && tokenizer.current == Some(b'&') { +    if tokenizer.parse_state.options.constructs.character_reference +        && tokenizer.current == Some(b'&') +    {          tokenizer.enter(Name::CharacterReference);          tokenizer.enter(Name::CharacterReferenceMarker);          tokenizer.consume(); diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index bfd15dc..d117006 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -128,7 +128,7 @@ use crate::util::{  ///   | ~~~  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.code_fenced { +    if tokenizer.parse_state.options.constructs.code_fenced {          if matches!(tokenizer.current, Some(b'\t' | b' ')) {              tokenizer.enter(Name::CodeFenced);              tokenizer.enter(Name::CodeFencedFence); @@ -139,7 +139,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              return State::Retry(space_or_tab_min_max(                  tokenizer,                  0, -                if tokenizer.parse_state.constructs.code_indented { +                if tokenizer.parse_state.options.constructs.code_indented {                      TAB_SIZE - 1                  } else {                      usize::MAX @@ -384,7 +384,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State {          State::Retry(space_or_tab_min_max(              tokenizer,              0, -            if tokenizer.parse_state.constructs.code_indented { +            if tokenizer.parse_state.options.constructs.code_indented {                  TAB_SIZE - 1              } else {                  usize::MAX diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 866c78e..7d279c1 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -72,7 +72,7 @@ use crate::util::constant::TAB_SIZE;  pub fn start(tokenizer: &mut Tokenizer) -> State {      // Do not interrupt paragraphs.      if !tokenizer.interrupt -        && tokenizer.parse_state.constructs.code_indented +        && tokenizer.parse_state.options.constructs.code_indented          && matches!(tokenizer.current, Some(b'\t' | b' '))      {          tokenizer.enter(Name::CodeIndented); diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index 413b5ee..b2cfd17 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -100,7 +100,7 @@ use crate::tokenizer::Tokenizer;  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'`') -            if tokenizer.parse_state.constructs.code_text +            if tokenizer.parse_state.options.constructs.code_text                  && (tokenizer.previous != Some(b'`')                      || (!tokenizer.events.is_empty()                          && tokenizer.events[tokenizer.events.len() - 1].name diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 071e595..e65d979 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -120,7 +120,7 @@ use crate::util::{  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      // Do not interrupt paragraphs (but do follow definitions). -    if tokenizer.parse_state.constructs.definition +    if tokenizer.parse_state.options.constructs.definition          && (!tokenizer.interrupt              || (!tokenizer.events.is_empty()                  && tokenizer.events[skip::opt_back( diff --git a/src/construct/frontmatter.rs b/src/construct/frontmatter.rs index 74006f6..268d91d 100644 --- a/src/construct/frontmatter.rs +++ b/src/construct/frontmatter.rs @@ -72,7 +72,7 @@ use crate::util::constant::FRONTMATTER_SEQUENCE_SIZE;  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      // Indent not allowed. -    if tokenizer.parse_state.constructs.frontmatter +    if tokenizer.parse_state.options.constructs.frontmatter          && matches!(tokenizer.current, Some(b'+' | b'-'))      {          tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index 64c909a..c562ff6 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -56,7 +56,9 @@ use crate::tokenizer::Tokenizer;  ///   | b  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.hard_break_escape && tokenizer.current == Some(b'\\') { +    if tokenizer.parse_state.options.constructs.hard_break_escape +        && tokenizer.current == Some(b'\\') +    {          tokenizer.enter(Name::HardBreakEscape);          tokenizer.consume();          State::Next(StateName::HardBreakEscapeAfter) diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index dd09f74..c1090c4 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -77,14 +77,14 @@ use alloc::vec;  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.heading_atx { +    if tokenizer.parse_state.options.constructs.heading_atx {          tokenizer.enter(Name::HeadingAtx);          if matches!(tokenizer.current, Some(b'\t' | b' ')) {              tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok);              State::Retry(space_or_tab_min_max(                  tokenizer,                  0, -                if tokenizer.parse_state.constructs.code_indented { +                if tokenizer.parse_state.options.constructs.code_indented {                      TAB_SIZE - 1                  } else {                      usize::MAX diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 19d2dda..df1d4fb 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -85,7 +85,7 @@ use alloc::vec;  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.heading_setext +    if tokenizer.parse_state.options.constructs.heading_setext          && !tokenizer.lazy          // Require a paragraph before.          && (!tokenizer.events.is_empty() @@ -102,7 +102,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              State::Retry(space_or_tab_min_max(                  tokenizer,                  0, -                if tokenizer.parse_state.constructs.code_indented { +                if tokenizer.parse_state.options.constructs.code_indented {                      TAB_SIZE - 1                  } else {                      usize::MAX diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index edb500e..3f6e19a 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -131,7 +131,7 @@ const COMPLETE: u8 = 7;  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.html_flow { +    if tokenizer.parse_state.options.constructs.html_flow {          tokenizer.enter(Name::HtmlFlow);          if matches!(tokenizer.current, Some(b'\t' | b' ')) { @@ -141,7 +141,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {                  SpaceOrTabOptions {                      kind: Name::HtmlFlowData,                      min: 0, -                    max: if tokenizer.parse_state.constructs.code_indented { +                    max: if tokenizer.parse_state.options.constructs.code_indented {                          TAB_SIZE - 1                      } else {                          usize::MAX diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 5aa6137..d40361d 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -64,7 +64,7 @@ use crate::util::constant::HTML_CDATA_PREFIX;  ///       ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if Some(b'<') == tokenizer.current && tokenizer.parse_state.constructs.html_text { +    if Some(b'<') == tokenizer.current && tokenizer.parse_state.options.constructs.html_text {          tokenizer.enter(Name::HtmlText);          tokenizer.enter(Name::HtmlTextData);          tokenizer.consume(); diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 4532920..0ea745f 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -183,7 +183,7 @@ use alloc::vec;  /// > | [a] b  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if Some(b']') == tokenizer.current && tokenizer.parse_state.constructs.label_end { +    if Some(b']') == tokenizer.current && tokenizer.parse_state.options.constructs.label_end {          // If there is an okay opening:          if !tokenizer.tokenize_state.label_starts.is_empty() {              let label_start = tokenizer.tokenize_state.label_starts.last().unwrap(); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 8d35df2..a8c9ac3 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -44,7 +44,8 @@ use crate::tokenizer::{LabelStart, Tokenizer};  ///       ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.label_start_image && tokenizer.current == Some(b'!') { +    if tokenizer.parse_state.options.constructs.label_start_image && tokenizer.current == Some(b'!') +    {          tokenizer.enter(Name::LabelImage);          tokenizer.enter(Name::LabelImageMarker);          tokenizer.consume(); diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index e079b2d..3aeb68b 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -43,7 +43,8 @@ use crate::tokenizer::{LabelStart, Tokenizer};  ///       ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.label_start_link && tokenizer.current == Some(b'[') { +    if tokenizer.parse_state.options.constructs.label_start_link && tokenizer.current == Some(b'[') +    {          let start = tokenizer.events.len();          tokenizer.enter(Name::LabelLink);          tokenizer.enter(Name::LabelMarker); diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs index 7228a00..39b5d13 100644 --- a/src/construct/list_item.rs +++ b/src/construct/list_item.rs @@ -77,7 +77,7 @@ use alloc::{vec, vec::Vec};  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.list_item { +    if tokenizer.parse_state.options.constructs.list_item {          tokenizer.enter(Name::ListItem);          if matches!(tokenizer.current, Some(b'\t' | b' ')) { @@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              State::Retry(space_or_tab_min_max(                  tokenizer,                  0, -                if tokenizer.parse_state.constructs.code_indented { +                if tokenizer.parse_state.options.constructs.code_indented {                      TAB_SIZE - 1                  } else {                      usize::MAX diff --git a/src/construct/text.rs b/src/construct/text.rs index 06ba378..9d40585 100644 --- a/src/construct/text.rs +++ b/src/construct/text.rs @@ -27,7 +27,7 @@ use crate::state::{Name as StateName, State};  use crate::tokenizer::Tokenizer;  /// Characters that can start something in text. -const MARKERS: [u8; 9] = [ +const MARKERS: [u8; 10] = [      b'!',  // `label_start_image`      b'&',  // `character_reference`      b'*',  // `attention` @@ -37,6 +37,7 @@ const MARKERS: [u8; 9] = [      b']',  // `label_end`      b'_',  // `attention`      b'`',  // `code_text` +    b'~',  // `attention` (w/ `gfm_strikethrough`)  ];  /// Start of text. @@ -77,7 +78,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {              );              State::Retry(StateName::CharacterReferenceStart)          } -        Some(b'*' | b'_') => { +        Some(b'*' | b'_' | b'~') => {              tokenizer.attempt(                  State::Next(StateName::TextBefore),                  State::Next(StateName::TextBeforeData), @@ -171,11 +172,16 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {  pub fn resolve(tokenizer: &mut Tokenizer) {      resolve_whitespace(          tokenizer, -        tokenizer.parse_state.constructs.hard_break_trailing, +        tokenizer.parse_state.options.constructs.hard_break_trailing,          true,      ); -    if tokenizer.parse_state.constructs.gfm_autolink_literal { +    if tokenizer +        .parse_state +        .options +        .constructs +        .gfm_autolink_literal +    {          resolve_gfm_autolink_literal(tokenizer);      }  } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index f77f83e..12dd7cf 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -69,7 +69,7 @@ use crate::util::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};  ///     ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if tokenizer.parse_state.constructs.thematic_break { +    if tokenizer.parse_state.options.constructs.thematic_break {          tokenizer.enter(Name::ThematicBreak);          if matches!(tokenizer.current, Some(b'\t' | b' ')) { @@ -77,7 +77,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              State::Retry(space_or_tab_min_max(                  tokenizer,                  0, -                if tokenizer.parse_state.constructs.code_indented { +                if tokenizer.parse_state.options.constructs.code_indented {                      TAB_SIZE - 1                  } else {                      usize::MAX diff --git a/src/event.rs b/src/event.rs index 169fdb5..3c690e1 100644 --- a/src/event.rs +++ b/src/event.rs @@ -878,7 +878,6 @@ pub enum Name {      ///      ^      /// ```      EmphasisText, -    // To do: sort.      /// Whole frontmatter.      ///      /// ## Info @@ -1020,6 +1019,61 @@ pub enum Name {      ///     ^^^^^^^^^^^^^^^      /// ```      GfmAutolinkLiteralWww, +    /// GFM: Strikethrough. +    /// +    /// ## Info +    /// +    /// *   **Context**: +    ///     [text content][crate::construct::text] +    /// *   **Content model**: +    ///     [`GfmStrikethroughSequence`][Name::GfmStrikethroughSequence], +    ///     [`GfmStrikethroughText`][Name::GfmStrikethroughText] +    /// *   **Construct**: +    ///     [`attention`][crate::construct::attention] +    /// +    /// ## Example +    /// +    /// ```markdown +    /// > | ~a~ +    ///     ^^^ +    /// ``` +    GfmStrikethrough, +    /// Gfm: Strikethrough sequence. +    /// +    /// ## Info +    /// +    /// *   **Context**: +    ///     [`GfmStrikethrough`][Name::GfmStrikethrough] +    /// *   **Content model**: +    ///     void +    /// *   **Construct**: +    ///     [`attention`][crate::construct::attention] +    /// +    /// ## Example +    /// +    /// ```markdown +    /// > | ~a~ +    ///     ^ ^ +    /// ``` +    GfmStrikethroughSequence, +    /// Gfm: Strikethrough text. +    /// +    /// ## Info +    /// +    /// *   **Context**: +    ///     [`GfmStrikethrough`][Name::GfmStrikethrough] +    /// *   **Content model**: +    ///     [text content][crate::construct::text] +    /// *   **Construct**: +    ///     [`attention`][crate::construct::attention] +    /// +    /// ## Example +    /// +    /// ```markdown +    /// > | ~a~ +    ///      ^ +    /// ``` +    GfmStrikethroughText,      /// Whole hard break (escape).      ///      /// ## Info @@ -1977,7 +2031,7 @@ pub enum Name {  }  /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 46] = [ +pub const VOID_EVENTS: [Name; 47] = [      Name::AttentionSequence,      Name::AutolinkEmail,      Name::AutolinkMarker, @@ -2006,6 +2060,7 @@ pub const VOID_EVENTS: [Name; 46] = [      Name::GfmAutolinkLiteralEmail,      Name::GfmAutolinkLiteralProtocol,      Name::GfmAutolinkLiteralWww, +    Name::GfmStrikethroughSequence,      Name::FrontmatterSequence,      Name::HardBreakEscape,      Name::HardBreakTrailing, @@ -171,6 +171,13 @@ pub struct Constructs {      ///     ^^^^^^^^^^^^^^^^^^^      /// ```      pub gfm_autolink_literal: bool, +    /// GFM: strikethrough. +    /// +    /// ```markdown +    /// > | a ~b~ c. +    ///       ^^^ +    /// ``` +    pub gfm_strikethrough: bool,      /// Hard break (escape).      ///      /// ```markdown @@ -269,6 +276,7 @@ impl Default for Constructs {              definition: true,              frontmatter: false,              gfm_autolink_literal: false, +            gfm_strikethrough: false,              hard_break_escape: true,              hard_break_trailing: true,              heading_atx: true, @@ -292,13 +300,14 @@ impl Constructs {      pub fn gfm() -> Self {          Self {              gfm_autolink_literal: true, +            gfm_strikethrough: true,              ..Self::default()          }      }  }  /// Configuration (optional). -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)]  pub struct Options {      /// Whether to allow (dangerous) HTML.      /// The default is `false`, you can turn it on to `true` for trusted @@ -358,6 +367,43 @@ pub struct Options {      /// ```      pub allow_dangerous_protocol: bool, +    /// Whether to support GFM strikethrough (if enabled in `constructs`) with +    /// a single tilde (default: true). +    /// +    /// Single tildes work on github.com but are technically prohibited by GFM. +    /// +    /// ## Examples +    /// +    /// ``` +    /// use micromark::{micromark, micromark_with_options, Options, Constructs}; +    /// +    /// // micromark supports single tildes by default: +    /// assert_eq!( +    ///     micromark_with_options( +    ///       "~a~", +    ///       &Options { +    ///         constructs: Constructs::gfm(), +    ///         ..Options::default() +    ///       } +    ///   ), +    ///   "<p><del>a</del></p>" +    /// ); +    /// +    /// // Pass `gfm_strikethrough_single_tilde: false` to turn that off: +    /// assert_eq!( +    ///     micromark_with_options( +    ///       "~a~", +    ///       &Options { +    ///         constructs: Constructs::gfm(), +    ///         gfm_strikethrough_single_tilde: false, +    ///         ..Options::default() +    ///       } +    ///   ), +    ///   "<p>~a~</p>" +    /// ); +    /// ``` +    pub gfm_strikethrough_single_tilde: bool, +      /// Default line ending to use, for line endings not in `value`.      ///      /// Generally, micromark copies line endings (`\r`, `\n`, `\r\n`) in the @@ -427,6 +473,19 @@ pub struct Options {      pub constructs: Constructs,  } +impl Default for Options { +    /// Safe `CommonMark` defaults. +    fn default() -> Self { +        Self { +            allow_dangerous_html: false, +            allow_dangerous_protocol: false, +            gfm_strikethrough_single_tilde: true, +            default_line_ending: LineEnding::default(), +            constructs: Constructs::default(), +        } +    } +} +  /// Turn markdown into HTML.  ///  /// ## Examples diff --git a/src/parser.rs b/src/parser.rs index 404fd0f..afa08ac 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,7 +4,7 @@ use crate::event::{Event, Point};  use crate::state::{Name as StateName, State};  use crate::subtokenize::subtokenize;  use crate::tokenizer::Tokenizer; -use crate::{Constructs, Options}; +use crate::Options;  use alloc::{string::String, vec, vec::Vec};  /// Info needed, in all content types, when parsing markdown. @@ -13,7 +13,8 @@ use alloc::{string::String, vec, vec::Vec};  /// It also references the input value as bytes (`u8`).  #[derive(Debug)]  pub struct ParseState<'a> { -    pub constructs: &'a Constructs, +    /// Configuration. +    pub options: &'a Options,      /// List of chars.      pub bytes: &'a [u8],      /// Set of defined identifiers. @@ -25,7 +26,7 @@ pub struct ParseState<'a> {  /// Passes the bytes back so the compiler can access the source.  pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) {      let mut parse_state = ParseState { -        constructs: &options.constructs, +        options,          bytes: value.as_bytes(),          definitions: vec![],      }; | 
