aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compiler.rs18
-rw-r--r--src/construct/attention.rs206
-rw-r--r--src/construct/autolink.rs2
-rw-r--r--src/construct/block_quote.rs4
-rw-r--r--src/construct/character_escape.rs3
-rw-r--r--src/construct/character_reference.rs4
-rw-r--r--src/construct/code_fenced.rs6
-rw-r--r--src/construct/code_indented.rs2
-rw-r--r--src/construct/code_text.rs2
-rw-r--r--src/construct/definition.rs2
-rw-r--r--src/construct/frontmatter.rs2
-rw-r--r--src/construct/hard_break_escape.rs4
-rw-r--r--src/construct/heading_atx.rs4
-rw-r--r--src/construct/heading_setext.rs4
-rw-r--r--src/construct/html_flow.rs4
-rw-r--r--src/construct/html_text.rs2
-rw-r--r--src/construct/label_end.rs2
-rw-r--r--src/construct/label_start_image.rs3
-rw-r--r--src/construct/label_start_link.rs3
-rw-r--r--src/construct/list_item.rs4
-rw-r--r--src/construct/text.rs14
-rw-r--r--src/construct/thematic_break.rs4
-rw-r--r--src/event.rs59
-rw-r--r--src/lib.rs61
-rw-r--r--src/parser.rs7
25 files changed, 307 insertions, 119 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 2e13294..abf35c8 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -326,6 +326,7 @@ fn enter(context: &mut CompileContext) {
Name::DefinitionDestinationString => on_enter_definition_destination_string(context),
Name::Emphasis => on_enter_emphasis(context),
Name::Frontmatter => on_enter_frontmatter(context),
+ Name::GfmStrikethrough => on_enter_gfm_strikethrough(context),
Name::HtmlFlow => on_enter_html_flow(context),
Name::HtmlText => on_enter_html_text(context),
Name::Image => on_enter_image(context),
@@ -369,6 +370,7 @@ fn exit(context: &mut CompileContext) {
Name::DefinitionTitleString => on_exit_definition_title_string(context),
Name::Emphasis => on_exit_emphasis(context),
Name::Frontmatter => on_exit_frontmatter(context),
+ Name::GfmStrikethrough => on_exit_gfm_strikethrough(context),
Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context),
Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context),
Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context),
@@ -467,6 +469,13 @@ fn on_enter_frontmatter(context: &mut CompileContext) {
context.buffer();
}
+/// Handle [`Enter`][Kind::Enter]:[`GfmStrikethrough`][Name::GfmStrikethrough].
+fn on_enter_gfm_strikethrough(context: &mut CompileContext) {
+ if !context.image_alt_inside {
+ context.push("<del>");
+ }
+}
+
/// Handle [`Enter`][Kind::Enter]:[`HtmlFlow`][Name::HtmlFlow].
fn on_enter_html_flow(context: &mut CompileContext) {
context.line_ending_if_needed();
@@ -898,7 +907,7 @@ fn on_exit_definition_title_string(context: &mut CompileContext) {
context.media_stack.last_mut().unwrap().title = Some(buf);
}
-/// Handle [`Exit`][Kind::Exit]:[`Strong`][Name::Emphasis].
+/// Handle [`Exit`][Kind::Exit]:[`Emphasis`][Name::Emphasis].
fn on_exit_emphasis(context: &mut CompileContext) {
if !context.image_alt_inside {
context.push("</em>");
@@ -942,6 +951,13 @@ fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) {
on_exit_autolink_email(context);
}
+/// Handle [`Exit`][Kind::Exit]:[`GfmStrikethrough`][Name::GfmStrikethrough].
+fn on_exit_gfm_strikethrough(context: &mut CompileContext) {
+ if !context.image_alt_inside {
+ context.push("</del>");
+ }
+}
+
/// Handle [`Exit`][Kind::Exit]:[`HeadingAtx`][Name::HeadingAtx].
fn on_exit_heading_atx(context: &mut CompileContext) {
let rank = context
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index ef960d4..526f58c 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -1,4 +1,5 @@
-//! Attention (emphasis and strong) occurs in the [text][] content type.
+//! Attention (emphasis, strong, optionally GFM strikethrough) occurs in the
+//! [text][] content type.
//!
//! ## Grammar
//!
@@ -7,24 +8,31 @@
//!
//! ```bnf
//! attention_sequence ::= 1*'*' | 1*'_'
+//! gfm_attention_sequence ::= 1*'~'
//! ```
//!
//! Sequences are matched together to form attention based on which character
-//! they contain, and what character occurs before and after each sequence.
+//! they contain, how long they are, and what character occurs before and after
+//! each sequence.
//! Otherwise they are turned into data.
//!
//! ## HTML
//!
-//! When sequences match, and two markers can be “taken” from them, they
-//! together relate to the `<strong>` element in HTML.
+//! When asterisk/underscore sequences match, and two markers can be “taken”
+//! from them, they together relate to the `<strong>` element in HTML.
//! When one marker can be taken, they relate to the `<em>` element.
//! See [*§ 4.5.2 The `em` element*][html-em] and
//! [*§ 4.5.3 The `strong` element*][html-strong] in the HTML spec for more
//! info.
//!
+//! When tilde sequences match, they together relate to the `<del>` element in
+//! HTML.
+//! See [*§ 4.7.2 The `del` element*][html-del] in the HTML spec for more info.
+//!
//! ## Recommendation
//!
-//! It is recommended to use asterisks for attention when writing markdown.
+//! It is recommended to use asterisks for emphasis/strong attention when
+//! writing markdown.
//!
//! There are some small differences in whether sequences can open and/or close
//! based on whether they are formed with asterisks or underscores.
@@ -37,11 +45,18 @@
//! can look for asterisks to find syntax while not worrying about other
//! characters.
//!
+//! For strikethrough attention, it is recommended to use two markers.
+//! While `github.com` allows single tildes too, it technically prohibits it in
+//! their spec.
+//!
//! ## Tokens
//!
//! * [`Emphasis`][Name::Emphasis]
//! * [`EmphasisSequence`][Name::EmphasisSequence]
//! * [`EmphasisText`][Name::EmphasisText]
+//! * [`GfmStrikethrough`][Name::GfmStrikethrough]
+//! * [`GfmStrikethroughSequence`][Name::GfmStrikethroughSequence]
+//! * [`GfmStrikethroughText`][Name::GfmStrikethroughText]
//! * [`Strong`][Name::Strong]
//! * [`StrongSequence`][Name::StrongSequence]
//! * [`StrongText`][Name::StrongText]
@@ -52,11 +67,14 @@
//! ## References
//!
//! * [`attention.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/attention.js)
+//! * [`micromark-extension-gfm-strikethrough`](https://github.com/micromark/micromark-extension-gfm-strikethrough)
//! * [*§ 6.2 Emphasis and strong emphasis* in `CommonMark`](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis)
+//! * [*§ 6.5 Strikethrough (extension)* in `GFM`](https://github.github.com/gfm/#strikethrough-extension-)
//!
//! [text]: crate::construct::text
//! [html-em]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-em-element
//! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element
+//! [html-del]: https://html.spec.whatwg.org/multipage/edits.html#the-del-element
use crate::event::{Event, Kind, Name, Point};
use crate::resolve::Name as ResolveName;
@@ -94,7 +112,11 @@ struct Sequence {
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.attention && matches!(tokenizer.current, Some(b'*' | b'_'))
+ // Emphasis/strong:
+ if (tokenizer.parse_state.options.constructs.attention
+ && matches!(tokenizer.current, Some(b'*' | b'_')))
+ // GFM strikethrough:
+ || (tokenizer.parse_state.options.constructs.gfm_strikethrough && tokenizer.current == Some(b'~'))
{
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
tokenizer.enter(Name::AttentionSequence);
@@ -117,85 +139,15 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
} else {
tokenizer.exit(Name::AttentionSequence);
tokenizer.register_resolver(ResolveName::Attention);
- tokenizer.tokenize_state.marker = b'\0';
+ tokenizer.tokenize_state.marker = 0;
State::Ok
}
}
-/// Resolve attention sequences.
+/// Resolve sequences.
pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut index = 0;
- let mut balance = 0;
- let mut sequences = vec![];
-
// Find all sequences, gather info about them.
- while index < tokenizer.events.len() {
- let enter = &tokenizer.events[index];
-
- if enter.kind == Kind::Enter {
- balance += 1;
-
- if enter.name == Name::AttentionSequence {
- let end = index + 1;
- let exit = &tokenizer.events[end];
-
- let before_end = enter.point.index;
- let before_start = if before_end < 4 { 0 } else { before_end - 4 };
- let char_before =
- String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end])
- .chars()
- .last();
-
- let after_start = exit.point.index;
- let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() {
- tokenizer.parse_state.bytes.len()
- } else {
- after_start + 4
- };
- let char_after =
- String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end])
- .chars()
- .next();
-
- let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point)
- .head()
- .unwrap();
- let before = classify_opt(char_before);
- let after = classify_opt(char_after);
- let open = after == CharacterKind::Other
- || (after == CharacterKind::Punctuation && before != CharacterKind::Other);
- // To do: GFM strikethrough?
- // || char_after == '~'
- let close = before == CharacterKind::Other
- || (before == CharacterKind::Punctuation && after != CharacterKind::Other);
- // To do: GFM strikethrough?
- // || char_before == '~'
-
- sequences.push(Sequence {
- index,
- balance,
- start_point: enter.point.clone(),
- end_point: exit.point.clone(),
- size: exit.point.index - enter.point.index,
- open: if marker == b'*' {
- open
- } else {
- open && (before != CharacterKind::Other || !close)
- },
- close: if marker == b'*' {
- close
- } else {
- close && (after != CharacterKind::Other || !open)
- },
- marker,
- });
- }
- } else {
- balance -= 1;
- }
-
- index += 1;
- }
+ let mut sequences = get_sequences(tokenizer);
// Now walk through them and match them.
let mut close = 0;
@@ -230,7 +182,20 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
continue;
}
- // We’ve found a match!
+ // For GFM strikethrough:
+ // * both sequences must have the same size
+ // * more than 2 markers don’t work
+ // * one marker is prohibited by the spec, but supported by GH
+ if sequence_close.marker == b'~'
+ && (sequence_close.size != sequence_open.size
+ || sequence_close.size > 2
+ || sequence_close.size == 1
+ && !tokenizer.parse_state.options.gfm_strikethrough_single_tilde)
+ {
+ continue;
+ }
+
+ // We found a match!
next_index = match_sequences(tokenizer, &mut sequences, open, close);
break;
@@ -253,7 +218,80 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
tokenizer.map.consume(&mut tokenizer.events);
}
+/// Get sequences.
+fn get_sequences(tokenizer: &mut Tokenizer) -> Vec<Sequence> {
+ let mut index = 0;
+ let mut balance = 0;
+ let mut sequences = vec![];
+
+ while index < tokenizer.events.len() {
+ let enter = &tokenizer.events[index];
+
+ if enter.kind == Kind::Enter {
+ balance += 1;
+
+ if enter.name == Name::AttentionSequence {
+ let end = index + 1;
+ let exit = &tokenizer.events[end];
+
+ let before_end = enter.point.index;
+ let before_start = if before_end < 4 { 0 } else { before_end - 4 };
+ let after_start = exit.point.index;
+ let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() {
+ tokenizer.parse_state.bytes.len()
+ } else {
+ after_start + 4
+ };
+
+ let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point)
+ .head()
+ .unwrap();
+ let before = classify_opt(
+ String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end])
+ .chars()
+ .last(),
+ );
+ let after = classify_opt(
+ String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end])
+ .chars()
+ .next(),
+ );
+ let open = after == CharacterKind::Other
+ || (after == CharacterKind::Punctuation && before != CharacterKind::Other);
+ let close = before == CharacterKind::Other
+ || (before == CharacterKind::Punctuation && after != CharacterKind::Other);
+
+ sequences.push(Sequence {
+ index,
+ balance,
+ start_point: enter.point.clone(),
+ end_point: exit.point.clone(),
+ size: exit.point.index - enter.point.index,
+ open: if marker == b'_' {
+ open && (before != CharacterKind::Other || !close)
+ } else {
+ open
+ },
+ close: if marker == b'_' {
+ close && (after != CharacterKind::Other || !open)
+ } else {
+ close
+ },
+ marker,
+ });
+ }
+ } else {
+ balance -= 1;
+ }
+
+ index += 1;
+ }
+
+ sequences
+}
+
/// Match two sequences.
+#[allow(clippy::too_many_lines)]
fn match_sequences(
tokenizer: &mut Tokenizer,
sequences: &mut Vec<Sequence>,
@@ -292,7 +330,13 @@ fn match_sequences(
between += 1;
}
- let (group_name, seq_name, text_name) = if take == 1 {
+ let (group_name, seq_name, text_name) = if sequences[open].marker == b'~' {
+ (
+ Name::GfmStrikethrough,
+ Name::GfmStrikethroughSequence,
+ Name::GfmStrikethroughText,
+ )
+ } else if take == 1 {
(Name::Emphasis, Name::EmphasisSequence, Name::EmphasisText)
} else {
(Name::Strong, Name::StrongSequence, Name::StrongText)
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 4ecd580..21f8fa5 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -135,7 +135,7 @@ use crate::util::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.autolink && tokenizer.current == Some(b'<') {
+ if tokenizer.parse_state.options.constructs.autolink && tokenizer.current == Some(b'<') {
tokenizer.enter(Name::Autolink);
tokenizer.enter(Name::AutolinkMarker);
tokenizer.consume();
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 039c839..11783d0 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -59,7 +59,7 @@ use crate::util::constant::TAB_SIZE;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.block_quote {
+ if tokenizer.parse_state.options.constructs.block_quote {
tokenizer.enter(Name::BlockQuote);
State::Retry(StateName::BlockQuoteContStart)
} else {
@@ -82,7 +82,7 @@ pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
1,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index 438092e..67946a0 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -53,7 +53,8 @@ use crate::tokenizer::Tokenizer;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.character_escape && tokenizer.current == Some(b'\\') {
+ if tokenizer.parse_state.options.constructs.character_escape && tokenizer.current == Some(b'\\')
+ {
tokenizer.enter(Name::CharacterEscape);
tokenizer.enter(Name::CharacterEscapeMarker);
tokenizer.consume();
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 4669836..927e3d9 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -90,7 +90,9 @@ use crate::util::{
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.character_reference && tokenizer.current == Some(b'&') {
+ if tokenizer.parse_state.options.constructs.character_reference
+ && tokenizer.current == Some(b'&')
+ {
tokenizer.enter(Name::CharacterReference);
tokenizer.enter(Name::CharacterReferenceMarker);
tokenizer.consume();
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index bfd15dc..d117006 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -128,7 +128,7 @@ use crate::util::{
/// | ~~~
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.code_fenced {
+ if tokenizer.parse_state.options.constructs.code_fenced {
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.enter(Name::CodeFenced);
tokenizer.enter(Name::CodeFencedFence);
@@ -139,7 +139,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
return State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
@@ -384,7 +384,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 866c78e..7d279c1 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -72,7 +72,7 @@ use crate::util::constant::TAB_SIZE;
pub fn start(tokenizer: &mut Tokenizer) -> State {
// Do not interrupt paragraphs.
if !tokenizer.interrupt
- && tokenizer.parse_state.constructs.code_indented
+ && tokenizer.parse_state.options.constructs.code_indented
&& matches!(tokenizer.current, Some(b'\t' | b' '))
{
tokenizer.enter(Name::CodeIndented);
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index 413b5ee..b2cfd17 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -100,7 +100,7 @@ use crate::tokenizer::Tokenizer;
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`')
- if tokenizer.parse_state.constructs.code_text
+ if tokenizer.parse_state.options.constructs.code_text
&& (tokenizer.previous != Some(b'`')
|| (!tokenizer.events.is_empty()
&& tokenizer.events[tokenizer.events.len() - 1].name
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 071e595..e65d979 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -120,7 +120,7 @@ use crate::util::{
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
// Do not interrupt paragraphs (but do follow definitions).
- if tokenizer.parse_state.constructs.definition
+ if tokenizer.parse_state.options.constructs.definition
&& (!tokenizer.interrupt
|| (!tokenizer.events.is_empty()
&& tokenizer.events[skip::opt_back(
diff --git a/src/construct/frontmatter.rs b/src/construct/frontmatter.rs
index 74006f6..268d91d 100644
--- a/src/construct/frontmatter.rs
+++ b/src/construct/frontmatter.rs
@@ -72,7 +72,7 @@ use crate::util::constant::FRONTMATTER_SEQUENCE_SIZE;
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
// Indent not allowed.
- if tokenizer.parse_state.constructs.frontmatter
+ if tokenizer.parse_state.options.constructs.frontmatter
&& matches!(tokenizer.current, Some(b'+' | b'-'))
{
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index 64c909a..c562ff6 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -56,7 +56,9 @@ use crate::tokenizer::Tokenizer;
/// | b
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.hard_break_escape && tokenizer.current == Some(b'\\') {
+ if tokenizer.parse_state.options.constructs.hard_break_escape
+ && tokenizer.current == Some(b'\\')
+ {
tokenizer.enter(Name::HardBreakEscape);
tokenizer.consume();
State::Next(StateName::HardBreakEscapeAfter)
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index dd09f74..c1090c4 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -77,14 +77,14 @@ use alloc::vec;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.heading_atx {
+ if tokenizer.parse_state.options.constructs.heading_atx {
tokenizer.enter(Name::HeadingAtx);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok);
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 19d2dda..df1d4fb 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -85,7 +85,7 @@ use alloc::vec;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.heading_setext
+ if tokenizer.parse_state.options.constructs.heading_setext
&& !tokenizer.lazy
// Require a paragraph before.
&& (!tokenizer.events.is_empty()
@@ -102,7 +102,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index edb500e..3f6e19a 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -131,7 +131,7 @@ const COMPLETE: u8 = 7;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.html_flow {
+ if tokenizer.parse_state.options.constructs.html_flow {
tokenizer.enter(Name::HtmlFlow);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
@@ -141,7 +141,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
SpaceOrTabOptions {
kind: Name::HtmlFlowData,
min: 0,
- max: if tokenizer.parse_state.constructs.code_indented {
+ max: if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index 5aa6137..d40361d 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -64,7 +64,7 @@ use crate::util::constant::HTML_CDATA_PREFIX;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if Some(b'<') == tokenizer.current && tokenizer.parse_state.constructs.html_text {
+ if Some(b'<') == tokenizer.current && tokenizer.parse_state.options.constructs.html_text {
tokenizer.enter(Name::HtmlText);
tokenizer.enter(Name::HtmlTextData);
tokenizer.consume();
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 4532920..0ea745f 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -183,7 +183,7 @@ use alloc::vec;
/// > | [a] b
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if Some(b']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {
+ if Some(b']') == tokenizer.current && tokenizer.parse_state.options.constructs.label_end {
// If there is an okay opening:
if !tokenizer.tokenize_state.label_starts.is_empty() {
let label_start = tokenizer.tokenize_state.label_starts.last().unwrap();
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 8d35df2..a8c9ac3 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -44,7 +44,8 @@ use crate::tokenizer::{LabelStart, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.label_start_image && tokenizer.current == Some(b'!') {
+ if tokenizer.parse_state.options.constructs.label_start_image && tokenizer.current == Some(b'!')
+ {
tokenizer.enter(Name::LabelImage);
tokenizer.enter(Name::LabelImageMarker);
tokenizer.consume();
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index e079b2d..3aeb68b 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -43,7 +43,8 @@ use crate::tokenizer::{LabelStart, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.label_start_link && tokenizer.current == Some(b'[') {
+ if tokenizer.parse_state.options.constructs.label_start_link && tokenizer.current == Some(b'[')
+ {
let start = tokenizer.events.len();
tokenizer.enter(Name::LabelLink);
tokenizer.enter(Name::LabelMarker);
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
index 7228a00..39b5d13 100644
--- a/src/construct/list_item.rs
+++ b/src/construct/list_item.rs
@@ -77,7 +77,7 @@ use alloc::{vec, vec::Vec};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.list_item {
+ if tokenizer.parse_state.options.constructs.list_item {
tokenizer.enter(Name::ListItem);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
@@ -85,7 +85,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 06ba378..9d40585 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -27,7 +27,7 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
/// Characters that can start something in text.
-const MARKERS: [u8; 9] = [
+const MARKERS: [u8; 10] = [
b'!', // `label_start_image`
b'&', // `character_reference`
b'*', // `attention`
@@ -37,6 +37,7 @@ const MARKERS: [u8; 9] = [
b']', // `label_end`
b'_', // `attention`
b'`', // `code_text`
+ b'~', // `attention` (w/ `gfm_strikethrough`)
];
/// Start of text.
@@ -77,7 +78,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::CharacterReferenceStart)
}
- Some(b'*' | b'_') => {
+ Some(b'*' | b'_' | b'~') => {
tokenizer.attempt(
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
@@ -171,11 +172,16 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {
pub fn resolve(tokenizer: &mut Tokenizer) {
resolve_whitespace(
tokenizer,
- tokenizer.parse_state.constructs.hard_break_trailing,
+ tokenizer.parse_state.options.constructs.hard_break_trailing,
true,
);
- if tokenizer.parse_state.constructs.gfm_autolink_literal {
+ if tokenizer
+ .parse_state
+ .options
+ .constructs
+ .gfm_autolink_literal
+ {
resolve_gfm_autolink_literal(tokenizer);
}
}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index f77f83e..12dd7cf 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -69,7 +69,7 @@ use crate::util::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.constructs.thematic_break {
+ if tokenizer.parse_state.options.constructs.thematic_break {
tokenizer.enter(Name::ThematicBreak);
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
@@ -77,7 +77,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_min_max(
tokenizer,
0,
- if tokenizer.parse_state.constructs.code_indented {
+ if tokenizer.parse_state.options.constructs.code_indented {
TAB_SIZE - 1
} else {
usize::MAX
diff --git a/src/event.rs b/src/event.rs
index 169fdb5..3c690e1 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -878,7 +878,6 @@ pub enum Name {
/// ^
/// ```
EmphasisText,
- // To do: sort.
/// Whole frontmatter.
///
/// ## Info
@@ -1020,6 +1019,61 @@ pub enum Name {
/// ^^^^^^^^^^^^^^^
/// ```
GfmAutolinkLiteralWww,
+ /// GFM: Strikethrough.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [text content][crate::construct::text]
+ /// * **Content model**:
+ /// [`GfmStrikethroughSequence`][Name::GfmStrikethroughSequence],
+ /// [`GfmStrikethroughText`][Name::GfmStrikethroughText]
+ /// * **Construct**:
+ /// [`attention`][crate::construct::attention]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | ~a~
+ /// ^^^
+ /// ```
+ GfmStrikethrough,
+ /// Gfm: Strikethrough sequence.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmStrikethrough`][Name::GfmStrikethrough]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`attention`][crate::construct::attention]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | ~a~
+ /// ^ ^
+ /// ```
+ GfmStrikethroughSequence,
+ /// Gfm: Strikethrough text.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmStrikethrough`][Name::GfmStrikethrough]
+ /// * **Content model**:
+ /// [text content][crate::construct::text]
+ /// * **Construct**:
+ /// [`attention`][crate::construct::attention]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | ~a~
+ /// ^
+ /// ```
+ GfmStrikethroughText,
/// Whole hard break (escape).
///
/// ## Info
@@ -1977,7 +2031,7 @@ pub enum Name {
}
/// List of void events, used to make sure everything is working well.
-pub const VOID_EVENTS: [Name; 46] = [
+pub const VOID_EVENTS: [Name; 47] = [
Name::AttentionSequence,
Name::AutolinkEmail,
Name::AutolinkMarker,
@@ -2006,6 +2060,7 @@ pub const VOID_EVENTS: [Name; 46] = [
Name::GfmAutolinkLiteralEmail,
Name::GfmAutolinkLiteralProtocol,
Name::GfmAutolinkLiteralWww,
+ Name::GfmStrikethroughSequence,
Name::FrontmatterSequence,
Name::HardBreakEscape,
Name::HardBreakTrailing,
diff --git a/src/lib.rs b/src/lib.rs
index be423c6..893255a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -171,6 +171,13 @@ pub struct Constructs {
/// ^^^^^^^^^^^^^^^^^^^
/// ```
pub gfm_autolink_literal: bool,
+ /// GFM: strikethrough.
+ ///
+ /// ```markdown
+ /// > | a ~b~ c.
+ /// ^^^
+ /// ```
+ pub gfm_strikethrough: bool,
/// Hard break (escape).
///
/// ```markdown
@@ -269,6 +276,7 @@ impl Default for Constructs {
definition: true,
frontmatter: false,
gfm_autolink_literal: false,
+ gfm_strikethrough: false,
hard_break_escape: true,
hard_break_trailing: true,
heading_atx: true,
@@ -292,13 +300,14 @@ impl Constructs {
pub fn gfm() -> Self {
Self {
gfm_autolink_literal: true,
+ gfm_strikethrough: true,
..Self::default()
}
}
}
/// Configuration (optional).
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug)]
pub struct Options {
/// Whether to allow (dangerous) HTML.
/// The default is `false`, you can turn it on to `true` for trusted
@@ -358,6 +367,43 @@ pub struct Options {
/// ```
pub allow_dangerous_protocol: bool,
+ /// Whether to support GFM strikethrough (if enabled in `constructs`) with
+ /// a single tilde (default: true).
+ ///
+ /// Single tildes work on github.com but are technically prohibited by GFM.
+ ///
+ /// ## Examples
+ ///
+ /// ```
+ /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+ ///
+ /// // micromark supports single tildes by default:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "~a~",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><del>a</del></p>"
+ /// );
+ ///
+ /// // Pass `gfm_strikethrough_single_tilde: false` to turn that off:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "~a~",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// gfm_strikethrough_single_tilde: false,
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p>~a~</p>"
+ /// );
+ /// ```
+ pub gfm_strikethrough_single_tilde: bool,
+
/// Default line ending to use, for line endings not in `value`.
///
/// Generally, micromark copies line endings (`\r`, `\n`, `\r\n`) in the
@@ -427,6 +473,19 @@ pub struct Options {
pub constructs: Constructs,
}
+impl Default for Options {
+ /// Safe `CommonMark` defaults.
+ fn default() -> Self {
+ Self {
+ allow_dangerous_html: false,
+ allow_dangerous_protocol: false,
+ gfm_strikethrough_single_tilde: true,
+ default_line_ending: LineEnding::default(),
+ constructs: Constructs::default(),
+ }
+ }
+}
+
/// Turn markdown into HTML.
///
/// ## Examples
diff --git a/src/parser.rs b/src/parser.rs
index 404fd0f..afa08ac 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -4,7 +4,7 @@ use crate::event::{Event, Point};
use crate::state::{Name as StateName, State};
use crate::subtokenize::subtokenize;
use crate::tokenizer::Tokenizer;
-use crate::{Constructs, Options};
+use crate::Options;
use alloc::{string::String, vec, vec::Vec};
/// Info needed, in all content types, when parsing markdown.
@@ -13,7 +13,8 @@ use alloc::{string::String, vec, vec::Vec};
/// It also references the input value as bytes (`u8`).
#[derive(Debug)]
pub struct ParseState<'a> {
- pub constructs: &'a Constructs,
+ /// Configuration.
+ pub options: &'a Options,
/// List of chars.
pub bytes: &'a [u8],
/// Set of defined identifiers.
@@ -25,7 +26,7 @@ pub struct ParseState<'a> {
/// Passes the bytes back so the compiler can access the source.
pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) {
let mut parse_state = ParseState {
- constructs: &options.constructs,
+ options,
bytes: value.as_bytes(),
definitions: vec![],
};