aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-26 10:57:20 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-26 11:00:51 +0200
commitf41688c067be261279804b8ab3e04cd5d67f492f (patch)
tree7ddf8e3b4149de8c8425c8be3e6963c524ad4909 /src/construct
parent1e4c95079cb97b2b02440b21945c6d12741a7d19 (diff)
downloadmarkdown-rs-f41688c067be261279804b8ab3e04cd5d67f492f.tar.gz
markdown-rs-f41688c067be261279804b8ab3e04cd5d67f492f.tar.bz2
markdown-rs-f41688c067be261279804b8ab3e04cd5d67f492f.zip
Add support for math (text)
Diffstat (limited to '')
-rw-r--r--src/construct/code_fenced.rs4
-rw-r--r--src/construct/code_indented.rs4
-rw-r--r--src/construct/code_text.rs213
-rw-r--r--src/construct/mod.rs6
-rw-r--r--src/construct/paragraph.rs4
-rw-r--r--src/construct/partial_label.rs4
-rw-r--r--src/construct/raw_text.rs270
-rw-r--r--src/construct/text.rs35
8 files changed, 301 insertions, 239 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index d117006..edb2d93 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -48,7 +48,7 @@
//! Which value it holds depends on what your syntax highlighter supports, if
//! one is used.
//!
-//! In markdown, it is also possible to use [code (text)][code_text] in the
+//! In markdown, it is also possible to use [code (text)][raw_text] in the
//! [text][] content type.
//! It is also possible to create code with the
//! [code (indented)][code_indented] construct.
@@ -106,7 +106,7 @@
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [code_indented]: crate::construct::code_indented
-//! [code_text]: crate::construct::code_text
+//! [raw_text]: crate::construct::raw_text
//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 7d279c1..5fc9446 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -16,7 +16,7 @@
//! As this construct occurs in flow, like all flow constructs, it must be
//! followed by an eol (line ending) or eof (end of file).
//!
-//! In markdown, it is also possible to use [code (text)][code_text] in the
+//! In markdown, it is also possible to use [code (text)][raw_text] in the
//! [text][] content type.
//! It is also possible to create code with the [code (fenced)][code_fenced]
//! construct.
@@ -49,7 +49,7 @@
//! [flow]: crate::construct::flow
//! [text]: crate::construct::text
//! [code_fenced]: crate::construct::code_fenced
-//! [code_text]: crate::construct::code_text
+//! [raw_text]: crate::construct::raw_text
//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
deleted file mode 100644
index b2cfd17..0000000
--- a/src/construct/code_text.rs
+++ /dev/null
@@ -1,213 +0,0 @@
-//! Code (text) occurs in the [text][] content type.
-//!
-//! ## Grammar
-//!
-//! Code (text) forms with the following BNF
-//! (<small>see [construct][crate::construct] for character groups</small>):
-//!
-//! ```bnf
-//! ; Restriction: the number of markers in the closing sequence must be equal
-//! ; to the number of markers in the opening sequence.
-//! code_text ::= sequence 1*byte sequence
-//!
-//! ; Restriction: not preceded or followed by `` ` ``.
-//! sequence ::= 1*'`'
-//! ```
-//!
-//! The above grammar shows that it is not possible to create empty code.
-//! It is possible to include grave accents (ticks) in code, by wrapping it
-//! in bigger or smaller sequences:
-//!
-//! ```markdown
-//! Include more: `a``b` or include less: ``a`b``.
-//! ```
-//!
-//! It is also possible to include just one grave accent (tick):
-//!
-//! ```markdown
-//! Include just one: `` ` ``.
-//! ```
-//!
-//! Sequences are “gready”, in that they cannot be preceded or followed by
-//! more grave accents (ticks).
-//! To illustrate:
-//!
-//! ```markdown
-//! Not code: ``x`.
-//!
-//! Not code: `x``.
-//!
-//! Escapes work, this is code: \``x`.
-//!
-//! Escapes work, this is code: `x`\`.
-//! ```
-//!
-//! Yields:
-//!
-//! ```html
-//! <p>Not code: ``x`.</p>
-//! <p>Not code: `x``.</p>
-//! <p>Escapes work, this is code: `<code>x</code>.</p>
-//! <p>Escapes work, this is code: <code>x</code>`.</p>
-//! ```
-//!
-//! That is because, when turning markdown into HTML, the first and last space,
-//! if both exist and there is also a non-space in the code, are removed.
-//! Line endings, at that stage, are considered as spaces.
-//!
-//! In markdown, it is possible to create code with the
-//! [code (fenced)][code_fenced] or [code (indented)][code_indented] constructs
-//! in the [flow][] content type.
-//!
-//! ## HTML
-//!
-//! Code (text) relates to the `<code>` element in HTML.
-//! See [*§ 4.5.15 The `code` element*][html_code] in the HTML spec for more
-//! info.
-//!
-//! When turning markdown into HTML, each line ending is turned into a space.
-//!
-//! ## Tokens
-//!
-//! * [`CodeText`][Name::CodeText]
-//! * [`CodeTextData`][Name::CodeTextData]
-//! * [`CodeTextSequence`][Name::CodeTextSequence]
-//! * [`LineEnding`][Name::LineEnding]
-//!
-//! ## References
-//!
-//! * [`code-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-text.js)
-//! * [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans)
-//!
-//! [flow]: crate::construct::flow
-//! [text]: crate::construct::text
-//! [code_indented]: crate::construct::code_indented
-//! [code_fenced]: crate::construct::code_fenced
-//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
-
-use crate::event::Name;
-use crate::state::{Name as StateName, State};
-use crate::tokenizer::Tokenizer;
-
-/// Start of code (text).
-///
-/// ```markdown
-/// > | `a`
-/// ^
-/// > | \`a`
-/// ^
-/// ```
-pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'`')
- if tokenizer.parse_state.options.constructs.code_text
- && (tokenizer.previous != Some(b'`')
- || (!tokenizer.events.is_empty()
- && tokenizer.events[tokenizer.events.len() - 1].name
- == Name::CharacterEscape)) =>
- {
- tokenizer.enter(Name::CodeText);
- tokenizer.enter(Name::CodeTextSequence);
- State::Retry(StateName::CodeTextSequenceOpen)
- }
- _ => State::Nok,
- }
-}
-
-/// In opening sequence.
-///
-/// ```markdown
-/// > | `a`
-/// ^
-/// ```
-pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
- if let Some(b'`') = tokenizer.current {
- tokenizer.tokenize_state.size += 1;
- tokenizer.consume();
- State::Next(StateName::CodeTextSequenceOpen)
- } else {
- tokenizer.exit(Name::CodeTextSequence);
- State::Retry(StateName::CodeTextBetween)
- }
-}
-
-/// Between something and something else
-///
-/// ```markdown
-/// > | `a`
-/// ^^
-/// ```
-pub fn between(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None => {
- tokenizer.tokenize_state.size = 0;
- State::Nok
- }
- Some(b'\n') => {
- tokenizer.enter(Name::LineEnding);
- tokenizer.consume();
- tokenizer.exit(Name::LineEnding);
- State::Next(StateName::CodeTextBetween)
- }
- Some(b'`') => {
- tokenizer.enter(Name::CodeTextSequence);
- State::Retry(StateName::CodeTextSequenceClose)
- }
- _ => {
- tokenizer.enter(Name::CodeTextData);
- State::Retry(StateName::CodeTextData)
- }
- }
-}
-
-/// In data.
-///
-/// ```markdown
-/// > | `a`
-/// ^
-/// ```
-pub fn data(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n' | b'`') => {
- tokenizer.exit(Name::CodeTextData);
- State::Retry(StateName::CodeTextBetween)
- }
- _ => {
- tokenizer.consume();
- State::Next(StateName::CodeTextData)
- }
- }
-}
-
-/// In closing sequence.
-///
-/// ```markdown
-/// > | `a`
-/// ^
-/// ```
-pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'`') => {
- tokenizer.tokenize_state.size_b += 1;
- tokenizer.consume();
- State::Next(StateName::CodeTextSequenceClose)
- }
- _ => {
- if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_b {
- tokenizer.exit(Name::CodeTextSequence);
- tokenizer.exit(Name::CodeText);
- tokenizer.tokenize_state.size = 0;
- tokenizer.tokenize_state.size_b = 0;
- State::Ok
- } else {
- let index = tokenizer.events.len();
- tokenizer.exit(Name::CodeTextSequence);
- // More or less accents: mark as data.
- tokenizer.events[index - 1].name = Name::CodeTextData;
- tokenizer.events[index].name = Name::CodeTextData;
- tokenizer.tokenize_state.size_b = 0;
- State::Retry(StateName::CodeTextBetween)
- }
- }
- }
-}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index c5002bb..a0065fa 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -38,7 +38,7 @@
//! * [character reference][character_reference]
//! * [code (fenced)][code_fenced]
//! * [code (indented)][code_indented]
-//! * [code (text)][code_text]
+//! * [code (text)][raw_text]
//! * [definition][]
//! * [hard break (escape)][hard_break_escape]
//! * [heading (atx)][heading_atx]
@@ -62,6 +62,8 @@
//! * [gfm footnote definition][gfm_footnote_definition]
//! * [gfm task list item check][gfm_task_list_item_check]
//! * [gfm label start footnote][gfm_label_start_footnote]
+//! * math (text) (in `raw_text`)
+//! * gfm strikethrough (in attention)
//!
//! There are also several small subroutines typically used in different places:
//!
@@ -143,7 +145,6 @@ pub mod character_escape;
pub mod character_reference;
pub mod code_fenced;
pub mod code_indented;
-pub mod code_text;
pub mod definition;
pub mod document;
pub mod flow;
@@ -171,6 +172,7 @@ pub mod partial_space_or_tab;
pub mod partial_space_or_tab_eol;
pub mod partial_title;
pub mod partial_whitespace;
+pub mod raw_text;
pub mod string;
pub mod text;
pub mod thematic_break;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 9e20643..c1e7311 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -18,7 +18,7 @@
//! allowed to contain blank lines, or to be blank themselves.
//!
//! The paragraph is interpreted as the [text][] content type.
-//! That means that [autolinks][autolink], [code (text)][code_text], etc are allowed.
+//! That means that [autolinks][autolink], [code (text)][raw_text], etc are allowed.
//!
//! ## HTML
//!
@@ -37,7 +37,7 @@
//! [flow]: crate::construct::flow
//! [text]: crate::construct::text
//! [autolink]: crate::construct::autolink
-//! [code_text]: crate::construct::code_text
+//! [raw_text]: crate::construct::raw_text
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
use crate::event::{Content, Kind, Link, Name};
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index ab436b2..255fde1 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -44,7 +44,7 @@
//! > ([label start (image)][label_start_image] or
//! > [label start (link)][label_start_link]) and a closing
//! > ([label end][label_end]), so as to allow further phrasing such as
-//! > [code (text)][code_text] or [attention][].
+//! > [code (text)][raw_text] or [attention][].
//!
//! ## References
//!
@@ -58,7 +58,7 @@
//! [label_start_image]: crate::construct::label_start_image
//! [label_start_link]: crate::construct::label_start_link
//! [label_end]: crate::construct::label_end
-//! [code_text]: crate::construct::code_text
+//! [raw_text]: crate::construct::raw_text
//! [link_reference_size_max]: crate::util::constant::LINK_REFERENCE_SIZE_MAX
use crate::construct::partial_space_or_tab_eol::{space_or_tab_eol_with_options, Options};
diff --git a/src/construct/raw_text.rs b/src/construct/raw_text.rs
new file mode 100644
index 0000000..7f3990d
--- /dev/null
+++ b/src/construct/raw_text.rs
@@ -0,0 +1,270 @@
+//! Raw (text) occurs in the [text][] content type.
+//! It forms code (text) and math (text).
+//!
+//! ## Grammar
+//!
+//! Raw (text) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! ; Restriction: the number of markers in the closing sequence must be equal
+//! ; to the number of markers in the opening sequence.
+//! raw_text ::= sequence 1*byte sequence
+//!
+//! ; Restriction: not preceded or followed by the same marker.
+//! sequence ::= 1*'`' | 1*'$'
+//! ```
+//!
+//! The above grammar shows that it is not possible to create empty raw (text).
+//! It is possible to include the sequence marker (grave accent for code,
+//! dollar for math) in raw (text), by wrapping it in bigger or smaller
+//! sequences:
+//!
+//! ```markdown
+//! Include more: `a``b` or include less: ``a`b``.
+//! ```
+//!
+//! It is also possible to include just one marker:
+//!
+//! ```markdown
+//! Include just one: `` ` ``.
+//! ```
+//!
+//! Sequences are “gready”, in that they cannot be preceded or followed by
+//! more markers.
+//! To illustrate:
+//!
+//! ```markdown
+//! Not code: ``x`.
+//!
+//! Not code: `x``.
+//!
+//! Escapes work, this is code: \``x`.
+//!
+//! Escapes work, this is code: `x`\`.
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <p>Not code: ``x`.</p>
+//! <p>Not code: `x``.</p>
+//! <p>Escapes work, this is code: `<code>x</code>.</p>
+//! <p>Escapes work, this is code: <code>x</code>`.</p>
+//! ```
+//!
+//! That is because, when turning markdown into HTML, the first and last space,
+//! if both exist and there is also a non-space in the code, are removed.
+//! Line endings, at that stage, are considered as spaces.
+//!
+//! In markdown, it is possible to create code with the
+//! [code (fenced)][code_fenced] or [code (indented)][code_indented],
+//! and math with the [math (flow)][math_flow] constructs in the [flow][]
+//! content type.
+//!
+//! ## HTML
+//!
+//! Code (text) relates to the `<code>` element in HTML.
+//! See [*§ 4.5.15 The `code` element*][html_code] in the HTML spec for more
+//! info.
+//!
+//! Math (text) does not relate to HTML elements.
+//! `MathML`, which is sort of like SVG but for math, exists but it doesn’t work
+//! well and isn’t widely supported.
+//! Instead, it is recommended to use client side JavaScript with something like
+//! `KaTeX` or `MathJax` to process the math
+//! For that, the math is compiled as a `<code>` element with two classes:
+//! `lang-math` and `math-inline`.
+//! Client side JavaScript can look for these classes to process them further.
+//!
+//! When turning markdown into HTML, each line ending in raw (text) is turned
+//! into a space.
+//!
+//! ## Recommendations
+//!
+//! When authoring markdown with math, keep in mind that math doesn’t work in
+//! most places.
+//! Notably, GitHub currently has a really weird crappy client-side regex-based
+//! thing.
+//! But on your own (math-heavy?) site it can be great!
+//! Alternatively, set `options.math_text_single_dollar: false`, which prevents
+//! single dollars from being seen as math, and thus prevents normal dollars in
+//! text from being seen as math.
+//!
+//! ## Tokens
+//!
+//! * [`CodeText`][Name::CodeText]
+//! * [`CodeTextData`][Name::CodeTextData]
+//! * [`CodeTextSequence`][Name::CodeTextSequence]
+//! * [`MathText`][Name::MathText]
+//! * [`MathTextData`][Name::MathTextData]
+//! * [`MathTextSequence`][Name::MathTextSequence]
+//! * [`LineEnding`][Name::LineEnding]
+//!
+//! ## References
+//!
+//! * [`code-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-text.js)
+//! * [`micromark-extension-math`](https://github.com/micromark/micromark-extension-math)
+//! * [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans)
+//!
+//! [flow]: crate::construct::flow
+//! [text]: crate::construct::text
+//! [code_indented]: crate::construct::code_indented
+//! [code_fenced]: crate::construct::code_fenced
+//! [math_flow]: # "to do"
+//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+
+use crate::event::Name;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+
+/// Start of raw (text).
+///
+/// ```markdown
+/// > | `a`
+/// ^
+/// > | \`a`
+/// ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ // Code (text):
+ if ((tokenizer.parse_state.options.constructs.code_text && tokenizer.current == Some(b'`'))
+ // Math (text):
+ || (tokenizer.parse_state.options.constructs.math_text && tokenizer.current == Some(b'$')))
+ // Not the same marker (except when escaped).
+ && (tokenizer.previous != tokenizer.current
+ || (!tokenizer.events.is_empty()
+ && tokenizer.events[tokenizer.events.len() - 1].name == Name::CharacterEscape))
+ {
+ let marker = tokenizer.current.unwrap();
+ if marker == b'`' {
+ tokenizer.tokenize_state.token_1 = Name::CodeText;
+ tokenizer.tokenize_state.token_2 = Name::CodeTextSequence;
+ tokenizer.tokenize_state.token_3 = Name::CodeTextData;
+ } else {
+ tokenizer.tokenize_state.token_1 = Name::MathText;
+ tokenizer.tokenize_state.token_2 = Name::MathTextSequence;
+ tokenizer.tokenize_state.token_3 = Name::MathTextData;
+ }
+ tokenizer.tokenize_state.marker = marker;
+ tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
+ State::Retry(StateName::RawTextSequenceOpen)
+ } else {
+ State::Nok
+ }
+}
+
+/// In opening sequence.
+///
+/// ```markdown
+/// > | `a`
+/// ^
+/// ```
+pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::RawTextSequenceOpen)
+ }
+ // Not enough markers in the sequence.
+ else if tokenizer.tokenize_state.marker == b'$'
+ && tokenizer.tokenize_state.size == 1
+ && !tokenizer.parse_state.options.math_text_single_dollar
+ {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ State::Nok
+ } else {
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+ State::Retry(StateName::RawTextBetween)
+ }
+}
+
+/// Between something and something else
+///
+/// ```markdown
+/// > | `a`
+/// ^^
+/// ```
+pub fn between(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ State::Nok
+ }
+ Some(b'\n') => {
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ State::Next(StateName::RawTextBetween)
+ }
+ _ => {
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
+ State::Retry(StateName::RawTextSequenceClose)
+ } else {
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
+ State::Retry(StateName::RawTextData)
+ }
+ }
+ }
+}
+
+/// In data.
+///
+/// ```markdown
+/// > | `a`
+/// ^
+/// ```
+pub fn data(tokenizer: &mut Tokenizer) -> State {
+ if matches!(tokenizer.current, None | Some(b'\n'))
+ || tokenizer.current == Some(tokenizer.tokenize_state.marker)
+ {
+ tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
+ State::Retry(StateName::RawTextBetween)
+ } else {
+ tokenizer.consume();
+ State::Next(StateName::RawTextData)
+ }
+}
+
+/// In closing sequence.
+///
+/// ```markdown
+/// > | `a`
+/// ^
+/// ```
+pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.tokenize_state.size_b += 1;
+ tokenizer.consume();
+ State::Next(StateName::RawTextSequenceClose)
+ } else {
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+ if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_b {
+ tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.size_b = 0;
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ State::Ok
+ } else {
+ // More or less accents: mark as data.
+ let len = tokenizer.events.len();
+ tokenizer.events[len - 2].name = tokenizer.tokenize_state.token_3.clone();
+ tokenizer.events[len - 1].name = tokenizer.tokenize_state.token_3.clone();
+ tokenizer.tokenize_state.size_b = 0;
+ State::Retry(StateName::RawTextBetween)
+ }
+ }
+}
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 5535e3f..3cb0f10 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -1,16 +1,16 @@
//! The text content type.
//!
//! **Text** contains phrasing content such as
-//! [attention][crate::construct::attention] (emphasis, strong),
-//! [code (text)][crate::construct::code_text], and actual text.
+//! [attention][crate::construct::attention] (emphasis, gfm strikethrough, strong),
+//! [raw (text)][crate::construct::raw_text] (code (text), math (text)), and actual text.
//!
//! The constructs found in text are:
//!
-//! * [Attention][crate::construct::attention]
+//! * [Attention][crate::construct::attention] (emphasis, gfm strikethrough, strong)
//! * [Autolink][crate::construct::autolink]
//! * [Character escape][crate::construct::character_escape]
//! * [Character reference][crate::construct::character_reference]
-//! * [Code (text)][crate::construct::code_text]
+//! * [Raw (text)][crate::construct::raw_text] (code (text), math (text))
//! * [GFM: Label start (footnote)][crate::construct::gfm_label_start_footnote]
//! * [GFM: Task list item check][crate::construct::gfm_task_list_item_check]
//! * [Hard break (escape)][crate::construct::hard_break_escape]
@@ -29,17 +29,18 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
/// Characters that can start something in text.
-const MARKERS: [u8; 10] = [
+const MARKERS: [u8; 11] = [
b'!', // `label_start_image`
+ b'$', // `raw_text` (math (text))
b'&', // `character_reference`
- b'*', // `attention`
+ b'*', // `attention` (emphasis, strong)
b'<', // `autolink`, `html_text`
b'[', // `label_start_link`
b'\\', // `character_escape`, `hard_break_escape`
b']', // `label_end`, `gfm_label_start_footnote`
- b'_', // `attention`
- b'`', // `code_text`
- b'~', // `attention` (w/ `gfm_strikethrough`)
+ b'_', // `attention` (emphasis, strong)
+ b'`', // `raw_text` (code (text))
+ b'~', // `attention` (gfm strikethrough)
];
/// Start of text.
@@ -81,6 +82,14 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::LabelStartImageStart)
}
+ // raw (text) (code (text), math (text))
+ Some(b'$' | b'`') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::RawTextStart)
+ }
Some(b'&') => {
tokenizer.attempt(
State::Next(StateName::TextBefore),
@@ -88,6 +97,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::CharacterReferenceStart)
}
+ // attention (emphasis, gfm strikethrough, strong)
Some(b'*' | b'_' | b'~') => {
tokenizer.attempt(
State::Next(StateName::TextBefore),
@@ -124,13 +134,6 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::LabelEndStart)
}
- Some(b'`') => {
- tokenizer.attempt(
- State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
- );
- State::Retry(StateName::CodeTextStart)
- }
_ => State::Retry(StateName::TextBeforeData),
}
}