Add support for math (text)

author: Titus Wormer <tituswormer@gmail.com> 2022-08-26 10:57:20 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-08-26 11:00:51 +0200
commit: f41688c067be261279804b8ab3e04cd5d67f492f (patch)
tree: 7ddf8e3b4149de8c8425c8be3e6963c524ad4909 /src
parent: 1e4c95079cb97b2b02440b21945c6d12741a7d19 (diff)
download: markdown-rs-f41688c067be261279804b8ab3e04cd5d67f492f.tar.gz
markdown-rs-f41688c067be261279804b8ab3e04cd5d67f492f.tar.bz2
markdown-rs-f41688c067be261279804b8ab3e04cd5d67f492f.zip
12 files changed, 491 insertions, 312 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 6ad34b2..73834a4 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -90,7 +90,7 @@ struct CompileContext<'a> {
     /// Number of code (fenced) fenced.
     pub code_fenced_fences_count: Option<usize>,
     /// Whether we are in code (text).
-    pub code_text_inside: bool,
+    pub raw_text_inside: bool,
     /// Whether we are in image text.
     pub image_alt_inside: bool,
     /// Marker of character reference.
@@ -145,7 +145,7 @@ impl<'a> CompileContext<'a> {
             heading_setext_buffer: None,
             code_flow_seen_data: None,
             code_fenced_fences_count: None,
-            code_text_inside: false,
+            raw_text_inside: false,
             character_reference_marker: None,
             list_expect_first_marker: None,
             media_stack: vec![],
@@ -341,7 +341,7 @@ fn enter(context: &mut CompileContext) {
         Name::BlockQuote => on_enter_block_quote(context),
         Name::CodeIndented => on_enter_code_indented(context),
         Name::CodeFenced => on_enter_code_fenced(context),
-        Name::CodeText => on_enter_code_text(context),
+        Name::CodeText | Name::MathText => on_enter_raw_text(context),
         Name::Definition => on_enter_definition(context),
         Name::DefinitionDestinationString => on_enter_definition_destination_string(context),
         Name::Emphasis => on_enter_emphasis(context),
@@ -368,8 +368,9 @@ fn enter(context: &mut CompileContext) {
 fn exit(context: &mut CompileContext) {
     match context.events[context.index].name {
         Name::CodeFencedFenceMeta | Name::Resource => on_exit_drop(context),
-        Name::CharacterEscapeValue | Name::CodeTextData | Name::Data => on_exit_data(context),
-
+        Name::CharacterEscapeValue | Name::CodeTextData | Name::Data | Name::MathTextData => {
+            on_exit_data(context);
+        }
         Name::AutolinkEmail => on_exit_autolink_email(context),
         Name::AutolinkProtocol => on_exit_autolink_protocol(context),
         Name::BlankLineEnding => on_exit_blank_line_ending(context),
@@ -386,7 +387,7 @@ fn exit(context: &mut CompileContext) {
         Name::CodeFencedFence => on_exit_code_fenced_fence(context),
         Name::CodeFencedFenceInfo => on_exit_code_fenced_fence_info(context),
         Name::CodeFlowChunk => on_exit_code_flow_chunk(context),
-        Name::CodeText => on_exit_code_text(context),
+        Name::CodeText | Name::MathText => on_exit_raw_text(context),
         Name::Definition => on_exit_definition(context),
         Name::DefinitionDestinationString => on_exit_definition_destination_string(context),
         Name::DefinitionLabelString => on_exit_definition_label_string(context),
@@ -460,11 +461,15 @@ fn on_enter_code_fenced(context: &mut CompileContext) {
     context.code_fenced_fences_count = Some(0);
 }
 
-/// Handle [`Enter`][Kind::Enter]:[`CodeText`][Name::CodeText].
-fn on_enter_code_text(context: &mut CompileContext) {
-    context.code_text_inside = true;
+/// Handle [`Enter`][Kind::Enter]:{[`CodeText`][Name::CodeText],[`MathText`][Name::MathText]}.
+fn on_enter_raw_text(context: &mut CompileContext) {
+    context.raw_text_inside = true;
     if !context.image_alt_inside {
-        context.push("<code>");
+        context.push("<code");
+        if context.events[context.index].name == Name::MathText {
+            context.push(" class=\"lang-math math-inline\"");
+        }
+        context.push(">");
     }
     context.buffer();
 }
@@ -875,8 +880,8 @@ fn on_exit_code_flow(context: &mut CompileContext) {
     context.slurp_one_line_ending = false;
 }
 
-/// Handle [`Exit`][Kind::Exit]:[`CodeText`][Name::CodeText].
-fn on_exit_code_text(context: &mut CompileContext) {
+/// Handle [`Exit`][Kind::Exit]:{[`CodeText`][Name::CodeText],[`MathText`][Name::MathText]}.
+fn on_exit_raw_text(context: &mut CompileContext) {
     let result = context.resume();
     let mut bytes = result.as_bytes();
     let mut trim = false;
@@ -899,7 +904,7 @@ fn on_exit_code_text(context: &mut CompileContext) {
         bytes = &bytes[1..end];
     }
 
-    context.code_text_inside = false;
+    context.raw_text_inside = false;
     context.push(str::from_utf8(bytes).unwrap());
 
     if !context.image_alt_inside {
@@ -1209,7 +1214,7 @@ fn on_exit_label_text(context: &mut CompileContext) {
 
 /// Handle [`Exit`][Kind::Exit]:[`LineEnding`][Name::LineEnding].
 fn on_exit_line_ending(context: &mut CompileContext) {
-    if context.code_text_inside {
+    if context.raw_text_inside {
         context.push(" ");
     } else if context.slurp_one_line_ending
         // Ignore line endings after definitions.
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index d117006..edb2d93 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -48,7 +48,7 @@
 //! Which value it holds depends on what your syntax highlighter supports, if
 //! one is used.
 //!
-//! In markdown, it is also possible to use [code (text)][code_text] in the
+//! In markdown, it is also possible to use [code (text)][raw_text] in the
 //! [text][] content type.
 //! It is also possible to create code with the
 //! [code (indented)][code_indented] construct.
@@ -106,7 +106,7 @@
 //! [character_escape]: crate::construct::character_escape
 //! [character_reference]: crate::construct::character_reference
 //! [code_indented]: crate::construct::code_indented
-//! [code_text]: crate::construct::code_text
+//! [raw_text]: crate::construct::raw_text
 //! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
 //! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
 
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 7d279c1..5fc9446 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -16,7 +16,7 @@
 //! As this construct occurs in flow, like all flow constructs, it must be
 //! followed by an eol (line ending) or eof (end of file).
 //!
-//! In markdown, it is also possible to use [code (text)][code_text] in the
+//! In markdown, it is also possible to use [code (text)][raw_text] in the
 //! [text][] content type.
 //! It is also possible to create code with the [code (fenced)][code_fenced]
 //! construct.
@@ -49,7 +49,7 @@
 //! [flow]: crate::construct::flow
 //! [text]: crate::construct::text
 //! [code_fenced]: crate::construct::code_fenced
-//! [code_text]: crate::construct::code_text
+//! [raw_text]: crate::construct::raw_text
 //! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
 //! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
 
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
deleted file mode 100644
index b2cfd17..0000000
--- a/src/construct/code_text.rs
+++ /dev/null
@@ -1,213 +0,0 @@
-//! Code (text) occurs in the [text][] content type.
-//!
-//! ## Grammar
-//!
-//! Code (text) forms with the following BNF
-//! (<small>see [construct][crate::construct] for character groups</small>):
-//!
-//! ```bnf
-//! ; Restriction: the number of markers in the closing sequence must be equal
-//! ; to the number of markers in the opening sequence.
-//! code_text ::= sequence 1*byte sequence
-//!
-//! ; Restriction: not preceded or followed by `` ` ``.
-//! sequence ::= 1*'`'
-//! ```
-//!
-//! The above grammar shows that it is not possible to create empty code.
-//! It is possible to include grave accents (ticks) in code, by wrapping it
-//! in bigger or smaller sequences:
-//!
-//! ```markdown
-//! Include more: `a``b` or include less: ``a`b``.
-//! ```
-//!
-//! It is also possible to include just one grave accent (tick):
-//!
-//! ```markdown
-//! Include just one: `` ` ``.
-//! ```
-//!
-//! Sequences are “gready”, in that they cannot be preceded or followed by
-//! more grave accents (ticks).
-//! To illustrate:
-//!
-//! ```markdown
-//! Not code: ``x`.
-//!
-//! Not code: `x``.
-//!
-//! Escapes work, this is code: \``x`.
-//!
-//! Escapes work, this is code: `x`\`.
-//! ```
-//!
-//! Yields:
-//!
-//! ```html
-//! <p>Not code: ``x`.</p>
-//! <p>Not code: `x``.</p>
-//! <p>Escapes work, this is code: `<code>x</code>.</p>
-//! <p>Escapes work, this is code: <code>x</code>`.</p>
-//! ```
-//!
-//! That is because, when turning markdown into HTML, the first and last space,
-//! if both exist and there is also a non-space in the code, are removed.
-//! Line endings, at that stage, are considered as spaces.
-//!
-//! In markdown, it is possible to create code with the
-//! [code (fenced)][code_fenced] or [code (indented)][code_indented] constructs
-//! in the [flow][] content type.
-//!
-//! ## HTML
-//!
-//! Code (text) relates to the `<code>` element in HTML.
-//! See [*§ 4.5.15 The `code` element*][html_code] in the HTML spec for more
-//! info.
-//!
-//! When turning markdown into HTML, each line ending is turned into a space.
-//!
-//! ## Tokens
-//!
-//! *   [`CodeText`][Name::CodeText]
-//! *   [`CodeTextData`][Name::CodeTextData]
-//! *   [`CodeTextSequence`][Name::CodeTextSequence]
-//! *   [`LineEnding`][Name::LineEnding]
-//!
-//! ## References
-//!
-//! *   [`code-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-text.js)
-//! *   [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans)
-//!
-//! [flow]: crate::construct::flow
-//! [text]: crate::construct::text
-//! [code_indented]: crate::construct::code_indented
-//! [code_fenced]: crate::construct::code_fenced
-//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
-
-use crate::event::Name;
-use crate::state::{Name as StateName, State};
-use crate::tokenizer::Tokenizer;
-
-/// Start of code (text).
-///
-/// ```markdown
-/// > | `a`
-///     ^
-/// > | \`a`
-///      ^
-/// ```
-pub fn start(tokenizer: &mut Tokenizer) -> State {
-    match tokenizer.current {
-        Some(b'`')
-            if tokenizer.parse_state.options.constructs.code_text
-                && (tokenizer.previous != Some(b'`')
-                    || (!tokenizer.events.is_empty()
-                        && tokenizer.events[tokenizer.events.len() - 1].name
-                            == Name::CharacterEscape)) =>
-        {
-            tokenizer.enter(Name::CodeText);
-            tokenizer.enter(Name::CodeTextSequence);
-            State::Retry(StateName::CodeTextSequenceOpen)
-        }
-        _ => State::Nok,
-    }
-}
-
-/// In opening sequence.
-///
-/// ```markdown
-/// > | `a`
-///     ^
-/// ```
-pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
-    if let Some(b'`') = tokenizer.current {
-        tokenizer.tokenize_state.size += 1;
-        tokenizer.consume();
-        State::Next(StateName::CodeTextSequenceOpen)
-    } else {
-        tokenizer.exit(Name::CodeTextSequence);
-        State::Retry(StateName::CodeTextBetween)
-    }
-}
-
-/// Between something and something else
-///
-/// ```markdown
-/// > | `a`
-///      ^^
-/// ```
-pub fn between(tokenizer: &mut Tokenizer) -> State {
-    match tokenizer.current {
-        None => {
-            tokenizer.tokenize_state.size = 0;
-            State::Nok
-        }
-        Some(b'\n') => {
-            tokenizer.enter(Name::LineEnding);
-            tokenizer.consume();
-            tokenizer.exit(Name::LineEnding);
-            State::Next(StateName::CodeTextBetween)
-        }
-        Some(b'`') => {
-            tokenizer.enter(Name::CodeTextSequence);
-            State::Retry(StateName::CodeTextSequenceClose)
-        }
-        _ => {
-            tokenizer.enter(Name::CodeTextData);
-            State::Retry(StateName::CodeTextData)
-        }
-    }
-}
-
-/// In data.
-///
-/// ```markdown
-/// > | `a`
-///      ^
-/// ```
-pub fn data(tokenizer: &mut Tokenizer) -> State {
-    match tokenizer.current {
-        None | Some(b'\n' | b'`') => {
-            tokenizer.exit(Name::CodeTextData);
-            State::Retry(StateName::CodeTextBetween)
-        }
-        _ => {
-            tokenizer.consume();
-            State::Next(StateName::CodeTextData)
-        }
-    }
-}
-
-/// In closing sequence.
-///
-/// ```markdown
-/// > | `a`
-///       ^
-/// ```
-pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
-    match tokenizer.current {
-        Some(b'`') => {
-            tokenizer.tokenize_state.size_b += 1;
-            tokenizer.consume();
-            State::Next(StateName::CodeTextSequenceClose)
-        }
-        _ => {
-            if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_b {
-                tokenizer.exit(Name::CodeTextSequence);
-                tokenizer.exit(Name::CodeText);
-                tokenizer.tokenize_state.size = 0;
-                tokenizer.tokenize_state.size_b = 0;
-                State::Ok
-            } else {
-                let index = tokenizer.events.len();
-                tokenizer.exit(Name::CodeTextSequence);
-                // More or less accents: mark as data.
-                tokenizer.events[index - 1].name = Name::CodeTextData;
-                tokenizer.events[index].name = Name::CodeTextData;
-                tokenizer.tokenize_state.size_b = 0;
-                State::Retry(StateName::CodeTextBetween)
-            }
-        }
-    }
-}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index c5002bb..a0065fa 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -38,7 +38,7 @@
 //! *   [character reference][character_reference]
 //! *   [code (fenced)][code_fenced]
 //! *   [code (indented)][code_indented]
-//! *   [code (text)][code_text]
+//! *   [code (text)][raw_text]
 //! *   [definition][]
 //! *   [hard break (escape)][hard_break_escape]
 //! *   [heading (atx)][heading_atx]
@@ -62,6 +62,8 @@
 //! *   [gfm footnote definition][gfm_footnote_definition]
 //! *   [gfm task list item check][gfm_task_list_item_check]
 //! *   [gfm label start footnote][gfm_label_start_footnote]
+//! *   math (text) (in `raw_text`)
+//! *   gfm strikethrough (in attention)
 //!
 //! There are also several small subroutines typically used in different places:
 //!
@@ -143,7 +145,6 @@ pub mod character_escape;
 pub mod character_reference;
 pub mod code_fenced;
 pub mod code_indented;
-pub mod code_text;
 pub mod definition;
 pub mod document;
 pub mod flow;
@@ -171,6 +172,7 @@ pub mod partial_space_or_tab;
 pub mod partial_space_or_tab_eol;
 pub mod partial_title;
 pub mod partial_whitespace;
+pub mod raw_text;
 pub mod string;
 pub mod text;
 pub mod thematic_break;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 9e20643..c1e7311 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -18,7 +18,7 @@
 //! allowed to contain blank lines, or to be blank themselves.
 //!
 //! The paragraph is interpreted as the [text][] content type.
-//! That means that [autolinks][autolink], [code (text)][code_text], etc are allowed.
+//! That means that [autolinks][autolink], [code (text)][raw_text], etc are allowed.
 //!
 //! ## HTML
 //!
@@ -37,7 +37,7 @@
 //! [flow]: crate::construct::flow
 //! [text]: crate::construct::text
 //! [autolink]: crate::construct::autolink
-//! [code_text]: crate::construct::code_text
+//! [raw_text]: crate::construct::raw_text
 //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
 
 use crate::event::{Content, Kind, Link, Name};
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index ab436b2..255fde1 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -44,7 +44,7 @@
 //! > ([label start (image)][label_start_image] or
 //! > [label start (link)][label_start_link]) and a closing
 //! > ([label end][label_end]), so as to allow further phrasing such as
-//! > [code (text)][code_text] or [attention][].
+//! > [code (text)][raw_text] or [attention][].
 //!
 //! ## References
 //!
@@ -58,7 +58,7 @@
 //! [label_start_image]: crate::construct::label_start_image
 //! [label_start_link]: crate::construct::label_start_link
 //! [label_end]: crate::construct::label_end
-//! [code_text]: crate::construct::code_text
+//! [raw_text]: crate::construct::raw_text
 //! [link_reference_size_max]: crate::util::constant::LINK_REFERENCE_SIZE_MAX
 
 use crate::construct::partial_space_or_tab_eol::{space_or_tab_eol_with_options, Options};
diff --git a/src/construct/raw_text.rs b/src/construct/raw_text.rs
new file mode 100644
index 0000000..7f3990d
--- /dev/null
+++ b/src/construct/raw_text.rs
@@ -0,0 +1,270 @@
+//! Raw (text) occurs in the [text][] content type.
+//! It forms code (text) and math (text).
+//!
+//! ## Grammar
+//!
+//! Raw (text) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! ; Restriction: the number of markers in the closing sequence must be equal
+//! ; to the number of markers in the opening sequence.
+//! raw_text ::= sequence 1*byte sequence
+//!
+//! ; Restriction: not preceded or followed by the same marker.
+//! sequence ::= 1*'`' | 1*'$'
+//! ```
+//!
+//! The above grammar shows that it is not possible to create empty raw (text).
+//! It is possible to include the sequence marker (grave accent for code,
+//! dollar for math) in raw (text), by wrapping it in bigger or smaller
+//! sequences:
+//!
+//! ```markdown
+//! Include more: `a``b` or include less: ``a`b``.
+//! ```
+//!
+//! It is also possible to include just one marker:
+//!
+//! ```markdown
+//! Include just one: `` ` ``.
+//! ```
+//!
+//! Sequences are “gready”, in that they cannot be preceded or followed by
+//! more markers.
+//! To illustrate:
+//!
+//! ```markdown
+//! Not code: ``x`.
+//!
+//! Not code: `x``.
+//!
+//! Escapes work, this is code: \``x`.
+//!
+//! Escapes work, this is code: `x`\`.
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <p>Not code: ``x`.</p>
+//! <p>Not code: `x``.</p>
+//! <p>Escapes work, this is code: `<code>x</code>.</p>
+//! <p>Escapes work, this is code: <code>x</code>`.</p>
+//! ```
+//!
+//! That is because, when turning markdown into HTML, the first and last space,
+//! if both exist and there is also a non-space in the code, are removed.
+//! Line endings, at that stage, are considered as spaces.
+//!
+//! In markdown, it is possible to create code with the
+//! [code (fenced)][code_fenced] or [code (indented)][code_indented],
+//! and math with the [math (flow)][math_flow] constructs in the [flow][]
+//! content type.
+//!
+//! ## HTML
+//!
+//! Code (text) relates to the `<code>` element in HTML.
+//! See [*§ 4.5.15 The `code` element*][html_code] in the HTML spec for more
+//! info.
+//!
+//! Math (text) does not relate to HTML elements.
+//! `MathML`, which is sort of like SVG but for math, exists but it doesn’t work
+//! well and isn’t widely supported.
+//! Instead, it is recommended to use client side JavaScript with something like
+//! `KaTeX` or `MathJax` to process the math
+//! For that, the math is compiled as a `<code>` element with two classes:
+//! `lang-math` and `math-inline`.
+//! Client side JavaScript can look for these classes to process them further.
+//!
+//! When turning markdown into HTML, each line ending in raw (text) is turned
+//! into a space.
+//!
+//! ## Recommendations
+//!
+//! When authoring markdown with math, keep in mind that math doesn’t work in
+//! most places.
+//! Notably, GitHub currently has a really weird crappy client-side regex-based
+//! thing.
+//! But on your own (math-heavy?) site it can be great!
+//! Alternatively, set `options.math_text_single_dollar: false`, which prevents
+//! single dollars from being seen as math, and thus prevents normal dollars in
+//! text from being seen as math.
+//!
+//! ## Tokens
+//!
+//! *   [`CodeText`][Name::CodeText]
+//! *   [`CodeTextData`][Name::CodeTextData]
+//! *   [`CodeTextSequence`][Name::CodeTextSequence]
+//! *   [`MathText`][Name::MathText]
+//! *   [`MathTextData`][Name::MathTextData]
+//! *   [`MathTextSequence`][Name::MathTextSequence]
+//! *   [`LineEnding`][Name::LineEnding]
+//!
+//! ## References
+//!
+//! *   [`code-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-text.js)
+//! *   [`micromark-extension-math`](https://github.com/micromark/micromark-extension-math)
+//! *   [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans)
+//!
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
+//! [code_indented]: crate::construct::code_indented
+//! [code_fenced]: crate::construct::code_fenced
+//! [math_flow]: # "to do"
+//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+
+use crate::event::Name;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Start of raw (text).
+///
+/// ```markdown
+/// > | `a`
+///     ^
+/// > | \`a`
+///      ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+    // Code (text):
+    if ((tokenizer.parse_state.options.constructs.code_text && tokenizer.current == Some(b'`'))
+        // Math (text):
+        || (tokenizer.parse_state.options.constructs.math_text && tokenizer.current == Some(b'$')))
+        // Not the same marker (except when escaped).
+        && (tokenizer.previous != tokenizer.current
+            || (!tokenizer.events.is_empty()
+                && tokenizer.events[tokenizer.events.len() - 1].name == Name::CharacterEscape))
+    {
+        let marker = tokenizer.current.unwrap();
+        if marker == b'`' {
+            tokenizer.tokenize_state.token_1 = Name::CodeText;
+            tokenizer.tokenize_state.token_2 = Name::CodeTextSequence;
+            tokenizer.tokenize_state.token_3 = Name::CodeTextData;
+        } else {
+            tokenizer.tokenize_state.token_1 = Name::MathText;
+            tokenizer.tokenize_state.token_2 = Name::MathTextSequence;
+            tokenizer.tokenize_state.token_3 = Name::MathTextData;
+        }
+        tokenizer.tokenize_state.marker = marker;
+        tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
+        tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
+        State::Retry(StateName::RawTextSequenceOpen)
+    } else {
+        State::Nok
+    }
+}
+
+/// In opening sequence.
+///
+/// ```markdown
+/// > | `a`
+///     ^
+/// ```
+pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
+    if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+        tokenizer.tokenize_state.size += 1;
+        tokenizer.consume();
+        State::Next(StateName::RawTextSequenceOpen)
+    }
+    // Not enough markers in the sequence.
+    else if tokenizer.tokenize_state.marker == b'$'
+        && tokenizer.tokenize_state.size == 1
+        && !tokenizer.parse_state.options.math_text_single_dollar
+    {
+        tokenizer.tokenize_state.marker = 0;
+        tokenizer.tokenize_state.size = 0;
+        tokenizer.tokenize_state.token_1 = Name::Data;
+        tokenizer.tokenize_state.token_2 = Name::Data;
+        tokenizer.tokenize_state.token_3 = Name::Data;
+        State::Nok
+    } else {
+        tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+        State::Retry(StateName::RawTextBetween)
+    }
+}
+
+/// Between something and something else
+///
+/// ```markdown
+/// > | `a`
+///      ^^
+/// ```
+pub fn between(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        None => {
+            tokenizer.tokenize_state.marker = 0;
+            tokenizer.tokenize_state.size = 0;
+            tokenizer.tokenize_state.token_1 = Name::Data;
+            tokenizer.tokenize_state.token_2 = Name::Data;
+            tokenizer.tokenize_state.token_3 = Name::Data;
+            State::Nok
+        }
+        Some(b'\n') => {
+            tokenizer.enter(Name::LineEnding);
+            tokenizer.consume();
+            tokenizer.exit(Name::LineEnding);
+            State::Next(StateName::RawTextBetween)
+        }
+        _ => {
+            if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+                tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
+                State::Retry(StateName::RawTextSequenceClose)
+            } else {
+                tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
+                State::Retry(StateName::RawTextData)
+            }
+        }
+    }
+}
+
+/// In data.
+///
+/// ```markdown
+/// > | `a`
+///      ^
+/// ```
+pub fn data(tokenizer: &mut Tokenizer) -> State {
+    if matches!(tokenizer.current, None | Some(b'\n'))
+        || tokenizer.current == Some(tokenizer.tokenize_state.marker)
+    {
+        tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
+        State::Retry(StateName::RawTextBetween)
+    } else {
+        tokenizer.consume();
+        State::Next(StateName::RawTextData)
+    }
+}
+
+/// In closing sequence.
+///
+/// ```markdown
+/// > | `a`
+///       ^
+/// ```
+pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
+    if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+        tokenizer.tokenize_state.size_b += 1;
+        tokenizer.consume();
+        State::Next(StateName::RawTextSequenceClose)
+    } else {
+        tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+        if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_b {
+            tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
+            tokenizer.tokenize_state.marker = 0;
+            tokenizer.tokenize_state.size = 0;
+            tokenizer.tokenize_state.size_b = 0;
+            tokenizer.tokenize_state.token_1 = Name::Data;
+            tokenizer.tokenize_state.token_2 = Name::Data;
+            tokenizer.tokenize_state.token_3 = Name::Data;
+            State::Ok
+        } else {
+            // More or less accents: mark as data.
+            let len = tokenizer.events.len();
+            tokenizer.events[len - 2].name = tokenizer.tokenize_state.token_3.clone();
+            tokenizer.events[len - 1].name = tokenizer.tokenize_state.token_3.clone();
+            tokenizer.tokenize_state.size_b = 0;
+            State::Retry(StateName::RawTextBetween)
+        }
+    }
+}
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 5535e3f..3cb0f10 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -1,16 +1,16 @@
 //! The text content type.
 //!
 //! **Text** contains phrasing content such as
-//! [attention][crate::construct::attention] (emphasis, strong),
-//! [code (text)][crate::construct::code_text], and actual text.
+//! [attention][crate::construct::attention] (emphasis, gfm strikethrough, strong),
+//! [raw (text)][crate::construct::raw_text] (code (text), math (text)), and actual text.
 //!
 //! The constructs found in text are:
 //!
-//! *   [Attention][crate::construct::attention]
+//! *   [Attention][crate::construct::attention] (emphasis, gfm strikethrough, strong)
 //! *   [Autolink][crate::construct::autolink]
 //! *   [Character escape][crate::construct::character_escape]
 //! *   [Character reference][crate::construct::character_reference]
-//! *   [Code (text)][crate::construct::code_text]
+//! *   [Raw (text)][crate::construct::raw_text] (code (text), math (text))
 //! *   [GFM: Label start (footnote)][crate::construct::gfm_label_start_footnote]
 //! *   [GFM: Task list item check][crate::construct::gfm_task_list_item_check]
 //! *   [Hard break (escape)][crate::construct::hard_break_escape]
@@ -29,17 +29,18 @@ use crate::state::{Name as StateName, State};
 use crate::tokenizer::Tokenizer;
 
 /// Characters that can start something in text.
-const MARKERS: [u8; 10] = [
+const MARKERS: [u8; 11] = [
     b'!',  // `label_start_image`
+    b'$',  // `raw_text` (math (text))
     b'&',  // `character_reference`
-    b'*',  // `attention`
+    b'*',  // `attention` (emphasis, strong)
     b'<',  // `autolink`, `html_text`
     b'[',  // `label_start_link`
     b'\\', // `character_escape`, `hard_break_escape`
     b']',  // `label_end`, `gfm_label_start_footnote`
-    b'_',  // `attention`
-    b'`',  // `code_text`
-    b'~',  // `attention` (w/ `gfm_strikethrough`)
+    b'_',  // `attention` (emphasis, strong)
+    b'`',  // `raw_text` (code (text))
+    b'~',  // `attention` (gfm strikethrough)
 ];
 
 /// Start of text.
@@ -81,6 +82,14 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
             );
             State::Retry(StateName::LabelStartImageStart)
         }
+        // raw (text) (code (text), math (text))
+        Some(b'$' | b'`') => {
+            tokenizer.attempt(
+                State::Next(StateName::TextBefore),
+                State::Next(StateName::TextBeforeData),
+            );
+            State::Retry(StateName::RawTextStart)
+        }
         Some(b'&') => {
             tokenizer.attempt(
                 State::Next(StateName::TextBefore),
@@ -88,6 +97,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
             );
             State::Retry(StateName::CharacterReferenceStart)
         }
+        // attention (emphasis, gfm strikethrough, strong)
         Some(b'*' | b'_' | b'~') => {
             tokenizer.attempt(
                 State::Next(StateName::TextBefore),
@@ -124,13 +134,6 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
             );
             State::Retry(StateName::LabelEndStart)
         }
-        Some(b'`') => {
-            tokenizer.attempt(
-                State::Next(StateName::TextBefore),
-                State::Next(StateName::TextBeforeData),
-            );
-            State::Retry(StateName::CodeTextStart)
-        }
         _ => State::Retry(StateName::TextBeforeData),
     }
 }
diff --git a/src/event.rs b/src/event.rs
index 3b805e5..869f2e8 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -507,7 +507,7 @@ pub enum Name {
     ///     [`CodeTextSequence`][Name::CodeTextSequence],
     ///     [`LineEnding`][Name::LineEnding]
     /// *   **Construct**:
-    ///     [`code_text`][crate::construct::code_text]
+    ///     [`raw_text`][crate::construct::raw_text]
     ///
     /// ## Example
     ///
@@ -525,7 +525,7 @@ pub enum Name {
     /// *   **Content model**:
     ///     void
     /// *   **Construct**:
-    ///     [`code_text`][crate::construct::code_text]
+    ///     [`raw_text`][crate::construct::raw_text]
     ///
     /// ## Example
     ///
@@ -543,7 +543,7 @@ pub enum Name {
     /// *   **Content model**:
     ///     void
     /// *   **Construct**:
-    ///     [`code_text`][crate::construct::code_text]
+    ///     [`raw_text`][crate::construct::raw_text]
     ///
     /// ## Example
     ///
@@ -1889,6 +1889,62 @@ pub enum Name {
     ///     ^^^
     /// ```
     ListUnordered,
+    /// Whole math (text).
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [text content][crate::construct::text]
+    /// *   **Content model**:
+    ///     [`MathTextData`][Name::MathTextData],
+    ///     [`MathTextSequence`][Name::MathTextSequence],
+    ///     [`LineEnding`][Name::LineEnding]
+    /// *   **Construct**:
+    ///     [`raw_text`][crate::construct::raw_text]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | a $b$ c
+    ///       ^^^
+    /// ```
+    MathText,
+    /// Math (text) data.
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [`MathText`][Name::MathText],
+    /// *   **Content model**:
+    ///     void
+    /// *   **Construct**:
+    ///     [`raw_text`][crate::construct::raw_text]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | a `b` c
+    ///        ^
+    /// ```
+    MathTextData,
+    /// Math (text) sequence.
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [`MathText`][Name::MathText],
+    /// *   **Content model**:
+    ///     void
+    /// *   **Construct**:
+    ///     [`raw_text`][crate::construct::raw_text]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | a $b$ c
+    ///       ^ ^
+    /// ```
+    MathTextSequence,
     /// Whole paragraph.
     ///
     /// ## Info
@@ -2271,7 +2327,7 @@ pub enum Name {
 }
 
 /// List of void events, used to make sure everything is working well.
-pub const VOID_EVENTS: [Name; 53] = [
+pub const VOID_EVENTS: [Name; 55] = [
     Name::AttentionSequence,
     Name::AutolinkEmail,
     Name::AutolinkMarker,
@@ -2319,6 +2375,8 @@ pub const VOID_EVENTS: [Name; 53] = [
     Name::LineEnding,
     Name::ListItemMarker,
     Name::ListItemValue,
+    Name::MathTextData,
+    Name::MathTextSequence,
     Name::ReferenceMarker,
     Name::ResourceMarker,
     Name::ResourceTitleMarker,
diff --git a/src/lib.rs b/src/lib.rs
index fd5e500..98a4936 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -272,6 +272,13 @@ pub struct Constructs {
     ///     ^^^
     /// ```
     pub list_item: bool,
+    /// Math (text).
+    ///
+    /// ```markdown
+    /// > | a $b$ c
+    ///       ^^^
+    /// ```
+    pub math_text: bool,
     /// Thematic break.
     ///
     /// ```markdown
@@ -310,6 +317,7 @@ impl Default for Constructs {
             label_start_link: true,
             label_end: true,
             list_item: true,
+            math_text: false,
             thematic_break: true,
         }
     }
@@ -333,6 +341,7 @@ impl Constructs {
 }
 
 /// Configuration (optional).
+#[allow(clippy::struct_excessive_bools)]
 #[derive(Clone, Debug)]
 pub struct Options {
     /// Whether to allow (dangerous) HTML.
@@ -393,6 +402,74 @@ pub struct Options {
     /// ```
     pub allow_dangerous_protocol: bool,
 
+    /// Which constructs to enable and disable.
+    /// The default is to follow `CommonMark`.
+    ///
+    /// ## Examples
+    ///
+    /// ```
+    /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+    ///
+    /// // micromark follows CommonMark by default:
+    /// assert_eq!(
+    ///     micromark("    indented code?"),
+    ///     "<pre><code>indented code?\n</code></pre>"
+    /// );
+    ///
+    /// // Pass `constructs` to choose what to enable and disable:
+    /// assert_eq!(
+    ///     micromark_with_options(
+    ///         "    indented code?",
+    ///         &Options {
+    ///             constructs: Constructs {
+    ///                 code_indented: false,
+    ///                 ..Constructs::default()
+    ///             },
+    ///             ..Options::default()
+    ///         }
+    ///     ),
+    ///     "<p>indented code?</p>"
+    /// );
+    /// ```
+    pub constructs: Constructs,
+
+    /// Default line ending to use, for line endings not in `value`.
+    ///
+    /// Generally, micromark copies line endings (`\r`, `\n`, `\r\n`) in the
+    /// markdown document over to the compiled HTML.
+    /// In some cases, such as `> a`, CommonMark requires that extra line
+    /// endings are added: `<blockquote>\n<p>a</p>\n</blockquote>`.
+    ///
+    /// To create that line ending, the document is checked for the first line
+    /// ending that is used.
+    /// If there is no line ending, `default_line_ending` is used.
+    /// If that isn’t configured, `\n` is used.
+    ///
+    /// ## Examples
+    ///
+    /// ```
+    /// use micromark::{micromark, micromark_with_options, Options, LineEnding};
+    ///
+    /// // micromark uses `\n` by default:
+    /// assert_eq!(
+    ///     micromark("> a"),
+    ///     "<blockquote>\n<p>a</p>\n</blockquote>"
+    /// );
+    ///
+    /// // Define `default_line_ending` to configure the default:
+    /// assert_eq!(
+    ///     micromark_with_options(
+    ///         "> a",
+    ///         &Options {
+    ///             default_line_ending: LineEnding::CarriageReturnLineFeed,
+    ///             ..Options::default()
+    ///         }
+    ///     ),
+    ///     "<blockquote>\r\n<p>a</p>\r\n</blockquote>"
+    /// );
+    /// ```
+    pub default_line_ending: LineEnding,
+
     /// Label to use for the footnotes section.
     ///
     /// Change it when the markdown is not in English.
@@ -594,7 +671,7 @@ pub struct Options {
     pub gfm_footnote_clobber_prefix: Option<String>,
 
     /// Whether to support GFM strikethrough (if enabled in `constructs`) with
-    /// a single tilde (default: true).
+    /// a single tilde (default: `true`).
     ///
     /// Single tildes work on github.com but are technically prohibited by GFM.
     ///
@@ -630,73 +707,49 @@ pub struct Options {
     /// ```
     pub gfm_strikethrough_single_tilde: bool,
 
-    /// Default line ending to use, for line endings not in `value`.
+    /// Whether to support math (text) (if enabled in `constructs`) with a
+    /// single dollar (default: `true`).
     ///
-    /// Generally, micromark copies line endings (`\r`, `\n`, `\r\n`) in the
-    /// markdown document over to the compiled HTML.
-    /// In some cases, such as `> a`, CommonMark requires that extra line
-    /// endings are added: `<blockquote>\n<p>a</p>\n</blockquote>`.
-    ///
-    /// To create that line ending, the document is checked for the first line
-    /// ending that is used.
-    /// If there is no line ending, `default_line_ending` is used.
-    /// If that isn’t configured, `\n` is used.
+    /// Single dollars work in Pandoc and many other places, but often
+    /// interfere with “normal” dollars in text.
     ///
     /// ## Examples
     ///
     /// ```
-    /// use micromark::{micromark, micromark_with_options, Options, LineEnding};
-    ///
-    /// // micromark uses `\n` by default:
-    /// assert_eq!(
-    ///     micromark("> a"),
-    ///     "<blockquote>\n<p>a</p>\n</blockquote>"
-    /// );
+    /// use micromark::{micromark, micromark_with_options, Options, Constructs};
     ///
-    /// // Define `default_line_ending` to configure the default:
+    /// // micromark supports single dollars by default:
     /// assert_eq!(
     ///     micromark_with_options(
-    ///         "> a",
+    ///         "$a$",
     ///         &Options {
-    ///             default_line_ending: LineEnding::CarriageReturnLineFeed,
+    ///             constructs: Constructs {
+    ///                 math_text: true,
+    ///                 ..Constructs::default()
+    ///             },
     ///             ..Options::default()
     ///         }
     ///     ),
-    ///     "<blockquote>\r\n<p>a</p>\r\n</blockquote>"
-    /// );
-    /// ```
-    pub default_line_ending: LineEnding,
-
-    /// Which constructs to enable and disable.
-    /// The default is to follow `CommonMark`.
-    ///
-    /// ## Examples
-    ///
-    /// ```
-    /// use micromark::{micromark, micromark_with_options, Options, Constructs};
-    ///
-    /// // micromark follows CommonMark by default:
-    /// assert_eq!(
-    ///     micromark("    indented code?"),
-    ///     "<pre><code>indented code?\n</code></pre>"
+    ///     "<p><code class=\"lang-math math-inline\">a</code></p>"
     /// );
     ///
-    /// // Pass `constructs` to choose what to enable and disable:
+    /// // Pass `math_text_single_dollar: false` to turn that off:
     /// assert_eq!(
     ///     micromark_with_options(
-    ///         "    indented code?",
+    ///         "$a$",
     ///         &Options {
     ///             constructs: Constructs {
-    ///                 code_indented: false,
+    ///                 math_text: true,
     ///                 ..Constructs::default()
     ///             },
+    ///             math_text_single_dollar: false,
     ///             ..Options::default()
     ///         }
     ///     ),
-    ///     "<p>indented code?</p>"
+    ///     "<p>$a$</p>"
     /// );
     /// ```
-    pub constructs: Constructs,
+    pub math_text_single_dollar: bool,
 }
 
 impl Default for Options {
@@ -705,14 +758,15 @@ impl Default for Options {
         Self {
             allow_dangerous_html: false,
             allow_dangerous_protocol: false,
+            constructs: Constructs::default(),
+            default_line_ending: LineEnding::default(),
             gfm_footnote_label: None,
             gfm_footnote_label_tag_name: None,
             gfm_footnote_label_attributes: None,
             gfm_footnote_back_label: None,
             gfm_footnote_clobber_prefix: None,
             gfm_strikethrough_single_tilde: true,
-            default_line_ending: LineEnding::default(),
-            constructs: Constructs::default(),
+            math_text_single_dollar: true,
         }
     }
 }
diff --git a/src/state.rs b/src/state.rs
index 6c3f563..0c04821 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -78,11 +78,11 @@ pub enum Name {
     CodeIndentedFurtherBegin,
     CodeIndentedFurtherAfter,
 
-    CodeTextStart,
-    CodeTextSequenceOpen,
-    CodeTextBetween,
-    CodeTextData,
-    CodeTextSequenceClose,
+    RawTextStart,
+    RawTextSequenceOpen,
+    RawTextBetween,
+    RawTextData,
+    RawTextSequenceClose,
 
     DataStart,
     DataInside,
@@ -392,11 +392,11 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
         Name::CodeIndentedFurtherBegin => construct::code_indented::further_begin,
         Name::CodeIndentedFurtherAfter => construct::code_indented::further_after,
 
-        Name::CodeTextStart => construct::code_text::start,
-        Name::CodeTextSequenceOpen => construct::code_text::sequence_open,
-        Name::CodeTextBetween => construct::code_text::between,
-        Name::CodeTextData => construct::code_text::data,
-        Name::CodeTextSequenceClose => construct::code_text::sequence_close,
+        Name::RawTextStart => construct::raw_text::start,
+        Name::RawTextSequenceOpen => construct::raw_text::sequence_open,
+        Name::RawTextBetween => construct::raw_text::between,
+        Name::RawTextData => construct::raw_text::data,
+        Name::RawTextSequenceClose => construct::raw_text::sequence_close,
 
         Name::DataStart => construct::partial_data::start,
         Name::DataInside => construct::partial_data::inside,
author	Titus Wormer <tituswormer@gmail.com>	2022-08-26 10:57:20 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-08-26 11:00:51 +0200
commit	f41688c067be261279804b8ab3e04cd5d67f492f (patch)
tree	7ddf8e3b4149de8c8425c8be3e6963c524ad4909 /src
parent	1e4c95079cb97b2b02440b21945c6d12741a7d19 (diff)
download	markdown-rs-f41688c067be261279804b8ab3e04cd5d67f492f.tar.gz markdown-rs-f41688c067be261279804b8ab3e04cd5d67f492f.tar.bz2 markdown-rs-f41688c067be261279804b8ab3e04cd5d67f492f.zip