aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-22 11:50:42 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-22 11:50:42 +0200
commit351c69644bdbdf52c95e322904273657892920b5 (patch)
tree114a93ff760b522232f9f7290bc6f632b7250095 /src/construct
parent5e6829c2fb79c2b7f59e38f924e2b2900c52b5d5 (diff)
downloadmarkdown-rs-351c69644bdbdf52c95e322904273657892920b5.tar.gz
markdown-rs-351c69644bdbdf52c95e322904273657892920b5.tar.bz2
markdown-rs-351c69644bdbdf52c95e322904273657892920b5.zip
Add support for GFM strikethrough
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/attention.rs206
-rw-r--r--src/construct/autolink.rs2
-rw-r--r--src/construct/block_quote.rs4
-rw-r--r--src/construct/character_escape.rs3
-rw-r--r--src/construct/character_reference.rs4
-rw-r--r--src/construct/code_fenced.rs6
-rw-r--r--src/construct/code_indented.rs2
-rw-r--r--src/construct/code_text.rs2
-rw-r--r--src/construct/definition.rs2
-rw-r--r--src/construct/frontmatter.rs2
-rw-r--r--src/construct/hard_break_escape.rs4
-rw-r--r--src/construct/heading_atx.rs4
-rw-r--r--src/construct/heading_setext.rs4
-rw-r--r--src/construct/html_flow.rs4
-rw-r--r--src/construct/html_text.rs2
-rw-r--r--src/construct/label_end.rs2
-rw-r--r--src/construct/label_start_image.rs3
-rw-r--r--src/construct/label_start_link.rs3
-rw-r--r--src/construct/list_item.rs4
-rw-r--r--src/construct/text.rs14
-rw-r--r--src/construct/thematic_break.rs4
21 files changed, 169 insertions, 112 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index ef960d4..526f58c 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -1,4 +1,5 @@
-//! Attention (emphasis and strong) occurs in the [text][] content type.
+//! Attention (emphasis, strong, optionally GFM strikethrough) occurs in the
+//! [text][] content type.
//!
//! ## Grammar
//!
@@ -7,24 +8,31 @@
//!
//! ```bnf
//! attention_sequence ::= 1*'*' | 1*'_'
+//! gfm_attention_sequence ::= 1*'~'
//! ```
//!
//! Sequences are matched together to form attention based on which character
-//! they contain, and what character occurs before and after each sequence.
+//! they contain, how long they are, and what character occurs before and after
+//! each sequence.
//! Otherwise they are turned into data.
//!
//! ## HTML
//!
-//! When sequences match, and two markers can be “taken” from them, they
-//! together relate to the `<strong>` element in HTML.
+//! When asterisk/underscore sequences match, and two markers can be “taken”
+//! from them, they together relate to the `<strong>` element in HTML.
//! When one marker can be taken, they relate to the `<em>` element.
//! See [*§ 4.5.2 The `em` element*][html-em] and
//! [*§ 4.5.3 The `strong` element*][html-strong] in the HTML spec for more
//! info.
//!
+//! When tilde sequences match, they together relate to the `<del>` element in
+//! HTML.
+//! See [*§ 4.7.2 The `del` element*][html-del] in the HTML spec for more info.
+//!
//! ## Recommendation
//!
-//! It is recommended to use asterisks for attention when writing markdown.
+//! It is recommended to use asterisks for emphasis/strong attention when
+//! writing markdown.
//!
//! There are some small differences in whether sequences can open and/or close
//! based on whether they are formed with asterisks or underscores.
@@ -37,11 +45,18 @@
//! can look for asterisks to find syntax while not worrying about other
//! characters.
//!
+//! For strikethrough attention, it is recommended to use two markers.
+//! While `github.com` allows single tildes too, it technically prohibits it in
+//! their spec.
+//!
//! ## Tokens
//!
//! * [`Emphasis`][Name::Emphasis]
//! * [`EmphasisSequence`][Name::EmphasisSequence]
//! * [`EmphasisText`][Name::EmphasisText]
+//! * [`GfmStrikethrough`][Name::GfmStrikethrough]
+//! * [`GfmStrikethroughSequence`][Name::GfmStrikethroughSequence]
+//! * [`GfmStrikethroughText`][Name::GfmStrikethroughText]
//! * [`Strong`][Name::Strong]
//! * [`StrongSequence`][Name::StrongSequence]
//! * [`StrongText`][Name::StrongText]
@@ -52,11 +67,14 @@
//! ## References
//!
//! * [`attention.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/attention.js)
+//! * [`micromark-extension-gfm-strikethrough`](https://github.com/micromark/micromark-extension-gfm-strikethrough)
//! * [*§ 6.2 Emphasis and strong emphasis* in `CommonMark`](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis)
+//! * [*§ 6.5 Strikethrough (extension)* in `GFM`](https://github.github.com/gfm/#strikethrough-extension-)
//!
//! [text]: crate::construct::text
//! [html-em]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-em-element
//! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element
+//! [html-del]: https://html.spec.whatwg.org/multipage/edits.html#the-del-element
use crate::event::{Event, Kind, Name, Point};
use crate::resolve::Name as ResolveName;
@@ -94,7 +112,11 @@ struct Sequence {
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.attention && matches!(tokenizer.current, Some(b'*' | b'_'))
+ // Emphasis/strong:
+ if (tokenizer.parse_state.options.constructs.attention
+ && matches!(tokenizer.current, Some(b'*' | b'_')))
+ // GFM strikethrough:
+ || (tokenizer.parse_state.options.constructs.gfm_strikethrough && tokenizer.current == Some(b'~'))
{
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
tokenizer.enter(Name::AttentionSequence);
@@ -117,85 +139,15 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
} else {
tokenizer.exit(Name::AttentionSequence);
tokenizer.register_resolver(ResolveName::Attention);
- tokenizer.tokenize_state.marker = b'\0';
+ tokenizer.tokenize_state.marker = 0;
State::Ok
}
}
-/// Resolve attention sequences.
+/// Resolve sequences.
pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut index = 0;
- let mut balance = 0;
- let mut sequences = vec![];
-
// Find all sequences, gather info about them.
- while index < tokenizer.events.len() {
- let enter = &tokenizer.events[index];
-
- if enter.kind == Kind::Enter {
- balance += 1;
-
- if enter.name == Name::AttentionSequence {
- let end = index + 1;
- let exit = &tokenizer.events[end];
-
- let before_end = enter.point.index;
- let before_start = if before_end < 4 { 0 } else { before_end - 4 };
- let char_before =
- String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end])
- .chars()
- .last();
-
- let after_start = exit.point.index;
- let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() {
- tokenizer.parse_state.bytes.len()
- } else {
- after_start + 4
- };
- let char_after =
- String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end])
- .chars()
- .next();
-
- let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point)
- .head()
- .unwrap();
- let before = classify_opt(char_before);
- let after = classify_opt(char_after);
- let open = after == CharacterKind::Other
- || (after == CharacterKind::Punctuation && before != CharacterKind::Other);
- // To do: GFM strikethrough?
- // || char_after == '~'
- let close = before == CharacterKind::Other
- || (before == CharacterKind::Punctuation && after != CharacterKind::Other);
- // To do: GFM strikethrough?
- // || char_before == '~'
-
- sequences.push(Sequence {
- index,
- balance,
- start_point: enter.point.clone(),
- end_point: exit.point.clone(),
- size: exit.point.index - enter.point.index,
- open: if marker == b'*' {
- open
- } else {
- open && (before != CharacterKind::Other || !close)
- },
- close: if marker == b'*' {
- close
- } else {
- close && (after != CharacterKind::Other || !open)
- },
- marker,
- });
- }
- } else {
- balance -= 1;
- }
-
- index += 1;
- }
+ let mut sequences = get_sequences(tokenizer);
// Now walk through them and match them.
let mut close = 0;
@@ -230,7 +182,20 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
continue;
}
- // We’ve found a match!
+ // For GFM strikethrough:
+ // * both sequences must have the same size
+ // * more than 2 markers don’t work
+ // * one marker is prohibited by the spec, but supported by GH
+ if sequence_close.marker == b'~'
+ && (sequence_close.size != sequence_open.size
+ || sequence_close.size > 2
+ || sequence_close.size == 1
+ && !tokenizer.parse_state.options.gfm_strikethrough_single_tilde)
+ {
+ continue;
+ }
+
+ // We found a match!
next_index = match_sequences(tokenizer, &mut sequences, open, close);
break;
@@ -253,7 +218,80 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
tokenizer.map.consume(&mut tokenizer.events);
}
+/// Get sequences.
+fn get_sequences(tokenizer: &mut Tokenizer) -> Vec<Sequence> {
+ let mut index = 0;
+ let mut balance = 0;
+ let mut sequences = vec![];
+
+ while index < tokenizer.events.len() {
+ let enter = &tokenizer.events[index];
+
+ if enter.kind == Kind::Enter {
+ balance += 1;
+
+ if enter.name == Name::AttentionSequence {
+ let end = index + 1;
+ let exit = &tokenizer.events[end];
+
+ let before_end = enter.point.index;
+ let before_start = if before_end < 4 { 0 } else { before_end - 4 };
+ let after_start = exit.point.index;
+ let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() {
+ tokenizer.parse_state.bytes.len()
+ } else {
+ after_start + 4
+ };
+
+ let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point)
+ .head()
+ .unwrap();
+ let before = classify_opt(
+ String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end])
+ .chars()
+ .last(),
+ );
+ let after = classify_opt(
+ String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end])
+ .chars()
+ .next(),
+ );
+ let open = after == CharacterKind::Other
+ || (after == CharacterKind::Punctuation && before != CharacterKind::Other);
+ let close = before == CharacterKind::Other
+ || (before == CharacterKind::Punctuation && after != CharacterKind::Other);
+
+ sequences.push(Sequence {
+ index,
+ balance,
+ start_point: enter.point.clone(),
+ end_point: exit.point.clone(),
+ size: exit.point.index - enter.point.index,
+ open: if marker == b'_' {
+ open && (before != CharacterKind::Other || !close)
+ } else {
+ open
+ },
+ close: if marker == b'_' {
+ close && (after != CharacterKind::Other || !open)
+ } else {
+ close
+ },
+ marker,
+ });
+ }
+ } else {
+ balance -= 1;
+ }
+
+ index += 1;
+ }
+
+ sequences
+}
+
/// Match two sequences.
+#[allow(clippy::too_many_lines)]
fn match_sequences(
tokenizer: &mut Tokenizer,
sequences: &mut Vec<Sequence>,
@@ -292,7 +330,13 @@ fn match_sequences(
between += 1;
}
- let (group_name, seq_name, text_name) = if take == 1 {
+ let (group_name, seq_name, text_name) = if sequences[open].marker == b'~' {
+ (
+ Name::GfmStrikethrough,
+ Name::GfmStrikethroughSequence,
+ Name::GfmStrikethroughText,
+ )
+ } else if take == 1 {
(Name::Emphasis, Name::EmphasisSequence, Name::EmphasisText)
} else {
(Name::Strong, Name::StrongSequence, Name::StrongText)
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 4ecd580..21f8fa5 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -135,7 +135,7 @@ use crate::util::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.autolink && tokenizer.current == Some(b'<') {
+ if tokenizer.parse_state.options.constructs.autolink && tokenizer.current == Some(b'<') {
tokenizer.enter(Name::Autolink);
tokenizer.enter(Name::AutolinkMarker);
tokenizer.consume();
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 039c839..11783d0 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -59,7 +59,7 @@ use crate::util::constant::TAB_SIZE;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.block_quote {
+ if tokenizer.parse_state.options.constructs.block_quote {
tokenizer.enter(Name::BlockQuote);
State::Retry(StateName::BlockQuoteContStart)
} else {
@@ -82,7 +82,7 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
1,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index 438092e..67946a0 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -53,7 +53,8 @@ use crate::tokenizer::Tokenizer;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.character_escape && tokenizer.current == Some(b'\\') {
+ if tokenizer.parse_state.options.constructs.character_escape && tokenizer.current == Some(b'\\')
+ {
tokenizer.enter(Name::CharacterEscape);
tokenizer.enter(Name::CharacterEscapeMarker);
tokenizer.consume();
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 4669836..927e3d9 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -90,7 +90,9 @@ use crate::util::{
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.character_reference && tokenizer.current == Some(b'&') {
+ if tokenizer.parse_state.options.constructs.character_reference
+ && tokenizer.current == Some(b'&')
+ {
tokenizer.enter(Name::CharacterReference);
tokenizer.enter(Name::CharacterReferenceMarker);
tokenizer.consume();
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index bfd15dc..d117006 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -128,7 +128,7 @@ use crate::util::{
/// | ~~~
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.code_fenced {
+ if tokenizer.parse_state.options.constructs.code_fenced {
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.enter(Name::CodeFenced);
tokenizer.enter(Name::CodeFencedFence);
@@ -139,7 +139,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
return State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
@@ -384,7 +384,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 866c78e..7d279c1 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -72,7 +72,7 @@ use crate::util::constant::TAB_SIZE;
pub fn start(tokenizer: &mut Tokenizer) -> State {
// Do not interrupt paragraphs.
if !tokenizer.interrupt
- && tokenizer.parse_state.constructs.code_indented
+ && tokenizer.parse_state.options.constructs.code_indented
&& matches!(tokenizer.current, Some(b'\t' | b' '))
{
tokenizer.enter(Name::CodeIndented);
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index 413b5ee..b2cfd17 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -100,7 +100,7 @@ use crate::tokenizer::Tokenizer;
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`')
- if tokenizer.parse_state.constructs.code_text
+ if tokenizer.parse_state.options.constructs.code_text
&& (tokenizer.previous != Some(b'`')
|| (!tokenizer.events.is_empty()
&& tokenizer.events[tokenizer.events.len() - 1].name
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 071e595..e65d979 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -120,7 +120,7 @@ use crate::util::{
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
// Do not interrupt paragraphs (but do follow definitions).
- if tokenizer.parse_state.constructs.definition
+ if tokenizer.parse_state.options.constructs.definition
&& (!tokenizer.interrupt
|| (!tokenizer.events.is_empty()
&& tokenizer.events[skip::opt_back(
diff --git a/src/construct/frontmatter.rs b/src/construct/frontmatter.rs
index 74006f6..268d91d 100644
--- a/src/construct/frontmatter.rs
+++ b/src/construct/frontmatter.rs
@@ -72,7 +72,7 @@ use crate::util::constant::FRONTMATTER_SEQUENCE_SIZE;
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
// Indent not allowed.
- if tokenizer.parse_state.constructs.frontmatter
+ if tokenizer.parse_state.options.constructs.frontmatter
&& matches!(tokenizer.current, Some(b'+' | b'-'))
{
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index 64c909a..c562ff6 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -56,7 +56,9 @@ use crate::tokenizer::Tokenizer;
/// | b
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.hard_break_escape && tokenizer.current == Some(b'\\') {
+ if tokenizer.parse_state.options.constructs.hard_break_escape
+ && tokenizer.current == Some(b'\\')
+ {
tokenizer.enter(Name::HardBreakEscape);
tokenizer.consume();
State::Next(StateName::HardBreakEscapeAfter)
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index dd09f74..c1090c4 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -77,14 +77,14 @@ use alloc::vec;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.heading_atx {
+ if tokenizer.parse_state.options.constructs.heading_atx {
tokenizer.enter(Name::HeadingAtx);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok);
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 19d2dda..df1d4fb 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -85,7 +85,7 @@ use alloc::vec;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.heading_setext
+ if tokenizer.parse_state.options.constructs.heading_setext
&& !tokenizer.lazy
// Require a paragraph before.
&& (!tokenizer.events.is_empty()
@@ -102,7 +102,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index edb500e..3f6e19a 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -131,7 +131,7 @@ const COMPLETE: u8 = 7;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.html_flow {
+ if tokenizer.parse_state.options.constructs.html_flow {
tokenizer.enter(Name::HtmlFlow);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
@@ -141,7 +141,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
SpaceOrTabOptions {
kind: Name::HtmlFlowData,
min: 0,
- max: if tokenizer.parse_state.constructs.code_indented {
+ max: if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index 5aa6137..d40361d 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -64,7 +64,7 @@ use crate::util::constant::HTML_CDATA_PREFIX;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if Some(b'<') == tokenizer.current && tokenizer.parse_state.constructs.html_text {
+ if Some(b'<') == tokenizer.current && tokenizer.parse_state.options.constructs.html_text {
tokenizer.enter(Name::HtmlText);
tokenizer.enter(Name::HtmlTextData);
tokenizer.consume();
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 4532920..0ea745f 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -183,7 +183,7 @@ use alloc::vec;
/// > | [a] b
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if Some(b']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {
+ if Some(b']') == tokenizer.current && tokenizer.parse_state.options.constructs.label_end {
// If there is an okay opening:
if !tokenizer.tokenize_state.label_starts.is_empty() {
let label_start = tokenizer.tokenize_state.label_starts.last().unwrap();
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 8d35df2..a8c9ac3 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -44,7 +44,8 @@ use crate::tokenizer::{LabelStart, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.label_start_image && tokenizer.current == Some(b'!') {
+ if tokenizer.parse_state.options.constructs.label_start_image && tokenizer.current == Some(b'!')
+ {
tokenizer.enter(Name::LabelImage);
tokenizer.enter(Name::LabelImageMarker);
tokenizer.consume();
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index e079b2d..3aeb68b 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -43,7 +43,8 @@ use crate::tokenizer::{LabelStart, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.label_start_link && tokenizer.current == Some(b'[') {
+ if tokenizer.parse_state.options.constructs.label_start_link && tokenizer.current == Some(b'[')
+ {
let start = tokenizer.events.len();
tokenizer.enter(Name::LabelLink);
tokenizer.enter(Name::LabelMarker);
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
index 7228a00..39b5d13 100644
--- a/src/construct/list_item.rs
+++ b/src/construct/list_item.rs
@@ -77,7 +77,7 @@ use alloc::{vec, vec::Vec};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.list_item {
+ if tokenizer.parse_state.options.constructs.list_item {
tokenizer.enter(Name::ListItem);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
@@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 06ba378..9d40585 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -27,7 +27,7 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
/// Characters that can start something in text.
-const MARKERS: [u8; 9] = [
+const MARKERS: [u8; 10] = [
b'!', // `label_start_image`
b'&', // `character_reference`
b'*', // `attention`
@@ -37,6 +37,7 @@ const MARKERS: [u8; 9] = [
b']', // `label_end`
b'_', // `attention`
b'`', // `code_text`
+ b'~', // `attention` (w/ `gfm_strikethrough`)
];
/// Start of text.
@@ -77,7 +78,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::CharacterReferenceStart)
}
- Some(b'*' | b'_') => {
+ Some(b'*' | b'_' | b'~') => {
tokenizer.attempt(
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
@@ -171,11 +172,16 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {
pub fn resolve(tokenizer: &mut Tokenizer) {
resolve_whitespace(
tokenizer,
- tokenizer.parse_state.constructs.hard_break_trailing,
+ tokenizer.parse_state.options.constructs.hard_break_trailing,
true,
);
- if tokenizer.parse_state.constructs.gfm_autolink_literal {
+ if tokenizer
+ .parse_state
+ .options
+ .constructs
+ .gfm_autolink_literal
+ {
resolve_gfm_autolink_literal(tokenizer);
}
}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index f77f83e..12dd7cf 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -69,7 +69,7 @@ use crate::util::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.thematic_break {
+ if tokenizer.parse_state.options.constructs.thematic_break {
tokenizer.enter(Name::ThematicBreak);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
@@ -77,7 +77,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX