From 351c69644bdbdf52c95e322904273657892920b5 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 22 Aug 2022 11:50:42 +0200 Subject: Add support for GFM strikethrough --- src/construct/attention.rs | 206 +++++++++++++++++++++-------------- src/construct/autolink.rs | 2 +- src/construct/block_quote.rs | 4 +- src/construct/character_escape.rs | 3 +- src/construct/character_reference.rs | 4 +- src/construct/code_fenced.rs | 6 +- src/construct/code_indented.rs | 2 +- src/construct/code_text.rs | 2 +- src/construct/definition.rs | 2 +- src/construct/frontmatter.rs | 2 +- src/construct/hard_break_escape.rs | 4 +- src/construct/heading_atx.rs | 4 +- src/construct/heading_setext.rs | 4 +- src/construct/html_flow.rs | 4 +- src/construct/html_text.rs | 2 +- src/construct/label_end.rs | 2 +- src/construct/label_start_image.rs | 3 +- src/construct/label_start_link.rs | 3 +- src/construct/list_item.rs | 4 +- src/construct/text.rs | 14 ++- src/construct/thematic_break.rs | 4 +- 21 files changed, 169 insertions(+), 112 deletions(-) (limited to 'src/construct') diff --git a/src/construct/attention.rs b/src/construct/attention.rs index ef960d4..526f58c 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -1,4 +1,5 @@ -//! Attention (emphasis and strong) occurs in the [text][] content type. +//! Attention (emphasis, strong, optionally GFM strikethrough) occurs in the +//! [text][] content type. //! //! ## Grammar //! @@ -7,24 +8,31 @@ //! //! ```bnf //! attention_sequence ::= 1*'*' | 1*'_' +//! gfm_attention_sequence ::= 1*'~' //! ``` //! //! Sequences are matched together to form attention based on which character -//! they contain, and what character occurs before and after each sequence. +//! they contain, how long they are, and what character occurs before and after +//! each sequence. //! Otherwise they are turned into data. //! //! ## HTML //! -//! When sequences match, and two markers can be “taken” from them, they -//! together relate to the `` element in HTML. +//! When asterisk/underscore sequences match, and two markers can be “taken” +//! from them, they together relate to the `` element in HTML. //! When one marker can be taken, they relate to the `` element. //! See [*§ 4.5.2 The `em` element*][html-em] and //! [*§ 4.5.3 The `strong` element*][html-strong] in the HTML spec for more //! info. //! +//! When tilde sequences match, they together relate to the `` element in +//! HTML. +//! See [*§ 4.7.2 The `del` element*][html-del] in the HTML spec for more info. +//! //! ## Recommendation //! -//! It is recommended to use asterisks for attention when writing markdown. +//! It is recommended to use asterisks for emphasis/strong attention when +//! writing markdown. //! //! There are some small differences in whether sequences can open and/or close //! based on whether they are formed with asterisks or underscores. @@ -37,11 +45,18 @@ //! can look for asterisks to find syntax while not worrying about other //! characters. //! +//! For strikethrough attention, it is recommended to use two markers. +//! While `github.com` allows single tildes too, it technically prohibits it in +//! their spec. +//! //! ## Tokens //! //! * [`Emphasis`][Name::Emphasis] //! * [`EmphasisSequence`][Name::EmphasisSequence] //! * [`EmphasisText`][Name::EmphasisText] +//! * [`GfmStrikethrough`][Name::GfmStrikethrough] +//! * [`GfmStrikethroughSequence`][Name::GfmStrikethroughSequence] +//! * [`GfmStrikethroughText`][Name::GfmStrikethroughText] //! * [`Strong`][Name::Strong] //! * [`StrongSequence`][Name::StrongSequence] //! * [`StrongText`][Name::StrongText] @@ -52,11 +67,14 @@ //! ## References //! //! * [`attention.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/attention.js) +//! * [`micromark-extension-gfm-strikethrough`](https://github.com/micromark/micromark-extension-gfm-strikethrough) //! * [*§ 6.2 Emphasis and strong emphasis* in `CommonMark`](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis) +//! * [*§ 6.5 Strikethrough (extension)* in `GFM`](https://github.github.com/gfm/#strikethrough-extension-) //! //! [text]: crate::construct::text //! [html-em]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-em-element //! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element +//! [html-del]: https://html.spec.whatwg.org/multipage/edits.html#the-del-element use crate::event::{Event, Kind, Name, Point}; use crate::resolve::Name as ResolveName; @@ -94,7 +112,11 @@ struct Sequence { /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.attention && matches!(tokenizer.current, Some(b'*' | b'_')) + // Emphasis/strong: + if (tokenizer.parse_state.options.constructs.attention + && matches!(tokenizer.current, Some(b'*' | b'_'))) + // GFM strikethrough: + || (tokenizer.parse_state.options.constructs.gfm_strikethrough && tokenizer.current == Some(b'~')) { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.enter(Name::AttentionSequence); @@ -117,85 +139,15 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } else { tokenizer.exit(Name::AttentionSequence); tokenizer.register_resolver(ResolveName::Attention); - tokenizer.tokenize_state.marker = b'\0'; + tokenizer.tokenize_state.marker = 0; State::Ok } } -/// Resolve attention sequences. +/// Resolve sequences. pub fn resolve(tokenizer: &mut Tokenizer) { - let mut index = 0; - let mut balance = 0; - let mut sequences = vec![]; - // Find all sequences, gather info about them. - while index < tokenizer.events.len() { - let enter = &tokenizer.events[index]; - - if enter.kind == Kind::Enter { - balance += 1; - - if enter.name == Name::AttentionSequence { - let end = index + 1; - let exit = &tokenizer.events[end]; - - let before_end = enter.point.index; - let before_start = if before_end < 4 { 0 } else { before_end - 4 }; - let char_before = - String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]) - .chars() - .last(); - - let after_start = exit.point.index; - let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() { - tokenizer.parse_state.bytes.len() - } else { - after_start + 4 - }; - let char_after = - String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]) - .chars() - .next(); - - let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point) - .head() - .unwrap(); - let before = classify_opt(char_before); - let after = classify_opt(char_after); - let open = after == CharacterKind::Other - || (after == CharacterKind::Punctuation && before != CharacterKind::Other); - // To do: GFM strikethrough? - // || char_after == '~' - let close = before == CharacterKind::Other - || (before == CharacterKind::Punctuation && after != CharacterKind::Other); - // To do: GFM strikethrough? - // || char_before == '~' - - sequences.push(Sequence { - index, - balance, - start_point: enter.point.clone(), - end_point: exit.point.clone(), - size: exit.point.index - enter.point.index, - open: if marker == b'*' { - open - } else { - open && (before != CharacterKind::Other || !close) - }, - close: if marker == b'*' { - close - } else { - close && (after != CharacterKind::Other || !open) - }, - marker, - }); - } - } else { - balance -= 1; - } - - index += 1; - } + let mut sequences = get_sequences(tokenizer); // Now walk through them and match them. let mut close = 0; @@ -230,7 +182,20 @@ pub fn resolve(tokenizer: &mut Tokenizer) { continue; } - // We’ve found a match! + // For GFM strikethrough: + // * both sequences must have the same size + // * more than 2 markers don’t work + // * one marker is prohibited by the spec, but supported by GH + if sequence_close.marker == b'~' + && (sequence_close.size != sequence_open.size + || sequence_close.size > 2 + || sequence_close.size == 1 + && !tokenizer.parse_state.options.gfm_strikethrough_single_tilde) + { + continue; + } + + // We found a match! next_index = match_sequences(tokenizer, &mut sequences, open, close); break; @@ -253,7 +218,80 @@ pub fn resolve(tokenizer: &mut Tokenizer) { tokenizer.map.consume(&mut tokenizer.events); } +/// Get sequences. +fn get_sequences(tokenizer: &mut Tokenizer) -> Vec { + let mut index = 0; + let mut balance = 0; + let mut sequences = vec![]; + + while index < tokenizer.events.len() { + let enter = &tokenizer.events[index]; + + if enter.kind == Kind::Enter { + balance += 1; + + if enter.name == Name::AttentionSequence { + let end = index + 1; + let exit = &tokenizer.events[end]; + + let before_end = enter.point.index; + let before_start = if before_end < 4 { 0 } else { before_end - 4 }; + let after_start = exit.point.index; + let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() { + tokenizer.parse_state.bytes.len() + } else { + after_start + 4 + }; + + let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point) + .head() + .unwrap(); + let before = classify_opt( + String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]) + .chars() + .last(), + ); + let after = classify_opt( + String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]) + .chars() + .next(), + ); + let open = after == CharacterKind::Other + || (after == CharacterKind::Punctuation && before != CharacterKind::Other); + let close = before == CharacterKind::Other + || (before == CharacterKind::Punctuation && after != CharacterKind::Other); + + sequences.push(Sequence { + index, + balance, + start_point: enter.point.clone(), + end_point: exit.point.clone(), + size: exit.point.index - enter.point.index, + open: if marker == b'_' { + open && (before != CharacterKind::Other || !close) + } else { + open + }, + close: if marker == b'_' { + close && (after != CharacterKind::Other || !open) + } else { + close + }, + marker, + }); + } + } else { + balance -= 1; + } + + index += 1; + } + + sequences +} + /// Match two sequences. +#[allow(clippy::too_many_lines)] fn match_sequences( tokenizer: &mut Tokenizer, sequences: &mut Vec, @@ -292,7 +330,13 @@ fn match_sequences( between += 1; } - let (group_name, seq_name, text_name) = if take == 1 { + let (group_name, seq_name, text_name) = if sequences[open].marker == b'~' { + ( + Name::GfmStrikethrough, + Name::GfmStrikethroughSequence, + Name::GfmStrikethroughText, + ) + } else if take == 1 { (Name::Emphasis, Name::EmphasisSequence, Name::EmphasisText) } else { (Name::Strong, Name::StrongSequence, Name::StrongText) diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 4ecd580..21f8fa5 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -135,7 +135,7 @@ use crate::util::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.autolink && tokenizer.current == Some(b'<') { + if tokenizer.parse_state.options.constructs.autolink && tokenizer.current == Some(b'<') { tokenizer.enter(Name::Autolink); tokenizer.enter(Name::AutolinkMarker); tokenizer.consume(); diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 039c839..11783d0 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -59,7 +59,7 @@ use crate::util::constant::TAB_SIZE; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.block_quote { + if tokenizer.parse_state.options.constructs.block_quote { tokenizer.enter(Name::BlockQuote); State::Retry(StateName::BlockQuoteContStart) } else { @@ -82,7 +82,7 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_min_max( tokenizer, 1, - if tokenizer.parse_state.constructs.code_indented { + if tokenizer.parse_state.options.constructs.code_indented { TAB_SIZE - 1 } else { usize::MAX diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 438092e..67946a0 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -53,7 +53,8 @@ use crate::tokenizer::Tokenizer; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.character_escape && tokenizer.current == Some(b'\\') { + if tokenizer.parse_state.options.constructs.character_escape && tokenizer.current == Some(b'\\') + { tokenizer.enter(Name::CharacterEscape); tokenizer.enter(Name::CharacterEscapeMarker); tokenizer.consume(); diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 4669836..927e3d9 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -90,7 +90,9 @@ use crate::util::{ /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.character_reference && tokenizer.current == Some(b'&') { + if tokenizer.parse_state.options.constructs.character_reference + && tokenizer.current == Some(b'&') + { tokenizer.enter(Name::CharacterReference); tokenizer.enter(Name::CharacterReferenceMarker); tokenizer.consume(); diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index bfd15dc..d117006 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -128,7 +128,7 @@ use crate::util::{ /// | ~~~ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.code_fenced { + if tokenizer.parse_state.options.constructs.code_fenced { if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.enter(Name::CodeFenced); tokenizer.enter(Name::CodeFencedFence); @@ -139,7 +139,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { return State::Retry(space_or_tab_min_max( tokenizer, 0, - if tokenizer.parse_state.constructs.code_indented { + if tokenizer.parse_state.options.constructs.code_indented { TAB_SIZE - 1 } else { usize::MAX @@ -384,7 +384,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_min_max( tokenizer, 0, - if tokenizer.parse_state.constructs.code_indented { + if tokenizer.parse_state.options.constructs.code_indented { TAB_SIZE - 1 } else { usize::MAX diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 866c78e..7d279c1 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -72,7 +72,7 @@ use crate::util::constant::TAB_SIZE; pub fn start(tokenizer: &mut Tokenizer) -> State { // Do not interrupt paragraphs. if !tokenizer.interrupt - && tokenizer.parse_state.constructs.code_indented + && tokenizer.parse_state.options.constructs.code_indented && matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.enter(Name::CodeIndented); diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index 413b5ee..b2cfd17 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -100,7 +100,7 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'`') - if tokenizer.parse_state.constructs.code_text + if tokenizer.parse_state.options.constructs.code_text && (tokenizer.previous != Some(b'`') || (!tokenizer.events.is_empty() && tokenizer.events[tokenizer.events.len() - 1].name diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 071e595..e65d979 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -120,7 +120,7 @@ use crate::util::{ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { // Do not interrupt paragraphs (but do follow definitions). - if tokenizer.parse_state.constructs.definition + if tokenizer.parse_state.options.constructs.definition && (!tokenizer.interrupt || (!tokenizer.events.is_empty() && tokenizer.events[skip::opt_back( diff --git a/src/construct/frontmatter.rs b/src/construct/frontmatter.rs index 74006f6..268d91d 100644 --- a/src/construct/frontmatter.rs +++ b/src/construct/frontmatter.rs @@ -72,7 +72,7 @@ use crate::util::constant::FRONTMATTER_SEQUENCE_SIZE; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { // Indent not allowed. - if tokenizer.parse_state.constructs.frontmatter + if tokenizer.parse_state.options.constructs.frontmatter && matches!(tokenizer.current, Some(b'+' | b'-')) { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index 64c909a..c562ff6 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -56,7 +56,9 @@ use crate::tokenizer::Tokenizer; /// | b /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.hard_break_escape && tokenizer.current == Some(b'\\') { + if tokenizer.parse_state.options.constructs.hard_break_escape + && tokenizer.current == Some(b'\\') + { tokenizer.enter(Name::HardBreakEscape); tokenizer.consume(); State::Next(StateName::HardBreakEscapeAfter) diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index dd09f74..c1090c4 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -77,14 +77,14 @@ use alloc::vec; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.heading_atx { + if tokenizer.parse_state.options.constructs.heading_atx { tokenizer.enter(Name::HeadingAtx); if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok); State::Retry(space_or_tab_min_max( tokenizer, 0, - if tokenizer.parse_state.constructs.code_indented { + if tokenizer.parse_state.options.constructs.code_indented { TAB_SIZE - 1 } else { usize::MAX diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 19d2dda..df1d4fb 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -85,7 +85,7 @@ use alloc::vec; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.heading_setext + if tokenizer.parse_state.options.constructs.heading_setext && !tokenizer.lazy // Require a paragraph before. && (!tokenizer.events.is_empty() @@ -102,7 +102,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_min_max( tokenizer, 0, - if tokenizer.parse_state.constructs.code_indented { + if tokenizer.parse_state.options.constructs.code_indented { TAB_SIZE - 1 } else { usize::MAX diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index edb500e..3f6e19a 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -131,7 +131,7 @@ const COMPLETE: u8 = 7; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.html_flow { + if tokenizer.parse_state.options.constructs.html_flow { tokenizer.enter(Name::HtmlFlow); if matches!(tokenizer.current, Some(b'\t' | b' ')) { @@ -141,7 +141,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { SpaceOrTabOptions { kind: Name::HtmlFlowData, min: 0, - max: if tokenizer.parse_state.constructs.code_indented { + max: if tokenizer.parse_state.options.constructs.code_indented { TAB_SIZE - 1 } else { usize::MAX diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 5aa6137..d40361d 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -64,7 +64,7 @@ use crate::util::constant::HTML_CDATA_PREFIX; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if Some(b'<') == tokenizer.current && tokenizer.parse_state.constructs.html_text { + if Some(b'<') == tokenizer.current && tokenizer.parse_state.options.constructs.html_text { tokenizer.enter(Name::HtmlText); tokenizer.enter(Name::HtmlTextData); tokenizer.consume(); diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 4532920..0ea745f 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -183,7 +183,7 @@ use alloc::vec; /// > | [a] b /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if Some(b']') == tokenizer.current && tokenizer.parse_state.constructs.label_end { + if Some(b']') == tokenizer.current && tokenizer.parse_state.options.constructs.label_end { // If there is an okay opening: if !tokenizer.tokenize_state.label_starts.is_empty() { let label_start = tokenizer.tokenize_state.label_starts.last().unwrap(); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 8d35df2..a8c9ac3 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -44,7 +44,8 @@ use crate::tokenizer::{LabelStart, Tokenizer}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.label_start_image && tokenizer.current == Some(b'!') { + if tokenizer.parse_state.options.constructs.label_start_image && tokenizer.current == Some(b'!') + { tokenizer.enter(Name::LabelImage); tokenizer.enter(Name::LabelImageMarker); tokenizer.consume(); diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index e079b2d..3aeb68b 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -43,7 +43,8 @@ use crate::tokenizer::{LabelStart, Tokenizer}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.label_start_link && tokenizer.current == Some(b'[') { + if tokenizer.parse_state.options.constructs.label_start_link && tokenizer.current == Some(b'[') + { let start = tokenizer.events.len(); tokenizer.enter(Name::LabelLink); tokenizer.enter(Name::LabelMarker); diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs index 7228a00..39b5d13 100644 --- a/src/construct/list_item.rs +++ b/src/construct/list_item.rs @@ -77,7 +77,7 @@ use alloc::{vec, vec::Vec}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.list_item { + if tokenizer.parse_state.options.constructs.list_item { tokenizer.enter(Name::ListItem); if matches!(tokenizer.current, Some(b'\t' | b' ')) { @@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_min_max( tokenizer, 0, - if tokenizer.parse_state.constructs.code_indented { + if tokenizer.parse_state.options.constructs.code_indented { TAB_SIZE - 1 } else { usize::MAX diff --git a/src/construct/text.rs b/src/construct/text.rs index 06ba378..9d40585 100644 --- a/src/construct/text.rs +++ b/src/construct/text.rs @@ -27,7 +27,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; /// Characters that can start something in text. -const MARKERS: [u8; 9] = [ +const MARKERS: [u8; 10] = [ b'!', // `label_start_image` b'&', // `character_reference` b'*', // `attention` @@ -37,6 +37,7 @@ const MARKERS: [u8; 9] = [ b']', // `label_end` b'_', // `attention` b'`', // `code_text` + b'~', // `attention` (w/ `gfm_strikethrough`) ]; /// Start of text. @@ -77,7 +78,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::CharacterReferenceStart) } - Some(b'*' | b'_') => { + Some(b'*' | b'_' | b'~') => { tokenizer.attempt( State::Next(StateName::TextBefore), State::Next(StateName::TextBeforeData), @@ -171,11 +172,16 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State { pub fn resolve(tokenizer: &mut Tokenizer) { resolve_whitespace( tokenizer, - tokenizer.parse_state.constructs.hard_break_trailing, + tokenizer.parse_state.options.constructs.hard_break_trailing, true, ); - if tokenizer.parse_state.constructs.gfm_autolink_literal { + if tokenizer + .parse_state + .options + .constructs + .gfm_autolink_literal + { resolve_gfm_autolink_literal(tokenizer); } } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index f77f83e..12dd7cf 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -69,7 +69,7 @@ use crate::util::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.constructs.thematic_break { + if tokenizer.parse_state.options.constructs.thematic_break { tokenizer.enter(Name::ThematicBreak); if matches!(tokenizer.current, Some(b'\t' | b' ')) { @@ -77,7 +77,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { State::Retry(space_or_tab_min_max( tokenizer, 0, - if tokenizer.parse_state.constructs.code_indented { + if tokenizer.parse_state.options.constructs.code_indented { TAB_SIZE - 1 } else { usize::MAX -- cgit