aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-26 13:29:10 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-26 13:29:40 +0200
commit670f1d82e01ea2394b21d7d1857f41bdc67b3fce (patch)
treed38fd96745b443dc5ece52c771fa5e39653937c4 /src/construct
parentf41688c067be261279804b8ab3e04cd5d67f492f (diff)
downloadmarkdown-rs-670f1d82e01ea2394b21d7d1857f41bdc67b3fce.tar.gz
markdown-rs-670f1d82e01ea2394b21d7d1857f41bdc67b3fce.tar.bz2
markdown-rs-670f1d82e01ea2394b21d7d1857f41bdc67b3fce.zip
Add support for math (flow)
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/code_indented.rs4
-rw-r--r--src/construct/flow.rs26
-rw-r--r--src/construct/mod.rs6
-rw-r--r--src/construct/partial_non_lazy_continuation.rs4
-rw-r--r--src/construct/raw_flow.rs (renamed from src/construct/code_fenced.rs)252
-rw-r--r--src/construct/raw_text.rs19
6 files changed, 205 insertions, 106 deletions
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 5fc9446..f2644d4 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -18,7 +18,7 @@
//!
//! In markdown, it is also possible to use [code (text)][raw_text] in the
//! [text][] content type.
-//! It is also possible to create code with the [code (fenced)][code_fenced]
+//! It is also possible to create code with the [code (fenced)][raw_flow]
//! construct.
//!
//! ## HTML
@@ -48,7 +48,7 @@
//!
//! [flow]: crate::construct::flow
//! [text]: crate::construct::text
-//! [code_fenced]: crate::construct::code_fenced
+//! [raw_flow]: crate::construct::raw_flow
//! [raw_text]: crate::construct::raw_text
//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
diff --git a/src/construct/flow.rs b/src/construct/flow.rs
index f3c7685..3f1cd77 100644
--- a/src/construct/flow.rs
+++ b/src/construct/flow.rs
@@ -11,12 +11,12 @@
//! The constructs found in flow are:
//!
//! * [Blank line][crate::construct::blank_line]
-//! * [Code (fenced)][crate::construct::code_fenced]
//! * [Code (indented)][crate::construct::code_indented]
//! * [Definition][crate::construct::definition]
//! * [Heading (atx)][crate::construct::heading_atx]
//! * [Heading (setext)][crate::construct::heading_setext]
//! * [HTML (flow)][crate::construct::html_flow]
+//! * [Raw (flow)][crate::construct::raw_flow] (code (fenced), math (flow))
//! * [Thematic break][crate::construct::thematic_break]
use crate::event::Name;
@@ -42,6 +42,15 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::HeadingAtxStart)
}
+ Some(b'$' | b'`' | b'~') => {
+ tokenizer.attempt(
+ State::Next(StateName::FlowAfter),
+ State::Next(StateName::FlowBeforeParagraph),
+ );
+ State::Retry(StateName::RawFlowStart)
+ }
+ // Note: `-` is also used in setext heading underline so it’s not
+ // included here.
Some(b'*' | b'_') => {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
@@ -71,13 +80,6 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::DefinitionStart)
}
- Some(b'`' | b'~') => {
- tokenizer.attempt(
- State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeParagraph),
- );
- State::Retry(StateName::CodeFencedStart)
- }
// Actual parsing: blank line? Indented code? Indented anything?
// Also includes `-` which can be a setext heading underline or thematic break.
None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore),
@@ -112,23 +114,23 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
- State::Next(StateName::FlowBeforeCodeFenced),
+ State::Next(StateName::FlowBeforeRaw),
);
State::Retry(StateName::CodeIndentedStart)
}
-/// At code (fenced).
+/// At raw.
///
/// ````markdown
/// > | ```
/// ^
/// ````
-pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State {
+pub fn before_raw(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
State::Next(StateName::FlowAfter),
State::Next(StateName::FlowBeforeHtml),
);
- State::Retry(StateName::CodeFencedStart)
+ State::Retry(StateName::RawFlowStart)
}
/// At html (flow).
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index a0065fa..9add015 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -25,7 +25,7 @@
//! thematic break.
//! These things are called constructs here.
//! Sometimes, there are several constructs that result in an equivalent thing.
-//! For example, [code (fenced)][code_fenced] and
+//! For example, [code (fenced)][raw_flow] and
//! [code (indented)][code_indented] are considered different constructs.
//!
//! The following constructs are found in markdown (CommonMark):
@@ -36,7 +36,6 @@
//! * [block quote][block_quote]
//! * [character escape][character_escape]
//! * [character reference][character_reference]
-//! * [code (fenced)][code_fenced]
//! * [code (indented)][code_indented]
//! * [code (text)][raw_text]
//! * [definition][]
@@ -50,6 +49,7 @@
//! * [label start (link)][label_start_link]
//! * [list item][list_item]
//! * [paragraph][]
+//! * [raw (flow)][raw_flow] (code (fenced), math (flow))
//! * [thematic break][thematic_break]
//!
//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
@@ -143,7 +143,6 @@ pub mod blank_line;
pub mod block_quote;
pub mod character_escape;
pub mod character_reference;
-pub mod code_fenced;
pub mod code_indented;
pub mod definition;
pub mod document;
@@ -172,6 +171,7 @@ pub mod partial_space_or_tab;
pub mod partial_space_or_tab_eol;
pub mod partial_title;
pub mod partial_whitespace;
+pub mod raw_flow;
pub mod raw_text;
pub mod string;
pub mod text;
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
index 35641ee..26a20dd 100644
--- a/src/construct/partial_non_lazy_continuation.rs
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -2,11 +2,11 @@
//!
//! This is a tiny helper that [flow][] constructs can use to make sure that
//! the following line is not lazy.
-//! For example, [html (flow)][html_flow] and code ([fenced][code_fenced],
+//! For example, [html (flow)][html_flow] and ([raw (flow)][raw_flow],
//! [indented][code_indented]), stop when the next line is lazy.
//!
//! [flow]: crate::construct::flow
-//! [code_fenced]: crate::construct::code_fenced
+//! [raw_flow]: crate::construct::raw_flow
//! [code_indented]: crate::construct::code_indented
//! [html_flow]: crate::construct::html_flow
diff --git a/src/construct/code_fenced.rs b/src/construct/raw_flow.rs
index edb2d93..7eaac0c 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/raw_flow.rs
@@ -1,4 +1,5 @@
-//! Code (fenced) occurs in the [flow][] content type.
+//! Raw (flow) occurs in the [flow][] content type.
+//! It forms code (fenced) and math (flow).
//!
//! ## Grammar
//!
@@ -6,20 +7,21 @@
//! (<small>see [construct][crate::construct] for character groups</small>):
//!
//! ```bnf
-//! code_fenced ::= fence_open *( eol *byte ) [ eol fence_close ]
+//! raw_flow ::= fence_open *( eol *byte ) [ eol fence_close ]
//!
-//! fence_open ::= sequence [ 1*space_or_tab info [ 1*space_or_tab meta ] ] *space_or_tab
+//! ; Restriction: math (flow) does not support the `info` part.
+//! fence_open ::= sequence [1*space_or_tab info [1*space_or_tab meta]] *space_or_tab
//! ; Restriction: the number of markers in the closing fence sequence must be
//! ; equal to or greater than the number of markers in the opening fence
//! ; sequence.
//! ; Restriction: the marker in the closing fence sequence must match the
//! ; marker in the opening fence sequence
//! fence_close ::= sequence *space_or_tab
-//! sequence ::= 3*'`' | 3*'~'
-//! ; Restriction: the `` ` `` character cannot occur in `info` if it is the marker.
+//! sequence ::= 3*'`' | 3*'~' | 2*'$'
+//! ; Restriction: the marker cannot occur in `info` if it is the `$` or `` ` `` character.
//! info ::= 1*text
-//! ; Restriction: the `` ` `` character cannot occur in `meta` if it is the marker.
-//! meta ::= 1*text *( *space_or_tab 1*text )
+//! ; Restriction: the marker cannot occur in `meta` if it is the `$` or `` ` `` character.
+//! meta ::= 1*text *(*space_or_tab 1*text)
//! ```
//!
//! As this construct occurs in flow, like all flow constructs, it must be
@@ -27,28 +29,29 @@
//!
//! The above grammar does not show how indentation (with `space_or_tab`) of
//! each line is handled.
-//! To parse code (fenced), let `x` be the number of `space_or_tab` characters
+//! To parse raw (flow), let `x` be the number of `space_or_tab` characters
//! before the opening fence sequence.
//! Each line of text is then allowed (not required) to be indented with up
//! to `x` spaces or tabs, which are then ignored as an indent instead of being
-//! considered as part of the code.
+//! considered as part of the content.
//! This indent does not affect the closing fence.
//! It can be indented up to a separate 3 spaces or tabs.
-//! A bigger indent makes it part of the code instead of a fence.
+//! A bigger indent makes it part of the content instead of a fence.
//!
//! The `info` and `meta` parts are interpreted as the [string][] content type.
//! That means that [character escapes][character_escape] and
//! [character references][character_reference] are allowed.
+//! Math (flow) does not support `info`.
//!
//! The optional `meta` part is ignored: it is not used when parsing or
//! rendering.
//!
//! The optional `info` part is used and is expected to specify the programming
-//! language that the code is in.
+//! language that the content is in.
//! Which value it holds depends on what your syntax highlighter supports, if
//! one is used.
//!
-//! In markdown, it is also possible to use [code (text)][raw_text] in the
+//! In markdown, it is also possible to use [raw (text)][raw_text] in the
//! [text][] content type.
//! It is also possible to create code with the
//! [code (indented)][code_indented] construct.
@@ -60,6 +63,15 @@
//! See [*§ 4.4.3 The `pre` element*][html_pre] and the [*§ 4.5.15 The `code`
//! element*][html_code] in the HTML spec for more info.
//!
+//! Math (flow) does not relate to HTML elements.
+//! `MathML`, which is sort of like SVG but for math, exists but it doesn’t work
+//! well and isn’t widely supported.
+//! Instead, it is recommended to use client side JavaScript with something like
+//! `KaTeX` or `MathJax` to process the math
+//! For that, the math is compiled as a `<pre>`, and a `<code>` element with two
+//! classes: `language-math` and `math-display`.
+//! Client side JavaScript can look for these classes to process them further.
+//!
//! The `info` is, when rendering to HTML, typically exposed as a class.
//! This behavior stems from the HTML spec ([*§ 4.5.15 The `code`
//! element*][html_code]).
@@ -84,6 +96,14 @@
//! Code (fenced) is more explicit, similar to code (text), and has support
//! for specifying the programming language.
//!
+//! When authoring markdown with math, keep in mind that math doesn’t work in
+//! most places.
+//! Notably, GitHub currently has a really weird crappy client-side regex-based
+//! thing.
+//! But on your own (math-heavy?) site it can be great!
+//! You can use code (fenced) with an info string of `math` to improve this, as
+//! that works in many places.
+//!
//! ## Tokens
//!
//! * [`CodeFenced`][Name::CodeFenced]
@@ -93,13 +113,21 @@
//! * [`CodeFencedFenceSequence`][Name::CodeFencedFenceSequence]
//! * [`CodeFlowChunk`][Name::CodeFlowChunk]
//! * [`LineEnding`][Name::LineEnding]
+//! * [`MathFlow`][Name::MathFlow]
+//! * [`MathFlowFence`][Name::MathFlowFence]
+//! * [`MathFlowFenceMeta`][Name::MathFlowFenceMeta]
+//! * [`MathFlowFenceSequence`][Name::MathFlowFenceSequence]
+//! * [`MathFlowChunk`][Name::MathFlowChunk]
//! * [`SpaceOrTab`][Name::SpaceOrTab]
//!
//! ## References
//!
//! * [`code-fenced.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js)
+//! * [`micromark-extension-math`](https://github.com/micromark/micromark-extension-math)
//! * [*§ 4.5 Fenced code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#fenced-code-blocks)
//!
+//! > 👉 **Note**: math is not specified anywhere.
+//!
//! [flow]: crate::construct::flow
//! [string]: crate::construct::string
//! [text]: crate::construct::text
@@ -119,7 +147,7 @@ use crate::util::{
slice::{Position, Slice},
};
-/// Start of fenced code.
+/// Start of raw.
///
/// ```markdown
/// > | ~~~js
@@ -128,12 +156,12 @@ use crate::util::{
/// | ~~~
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.options.constructs.code_fenced {
+ if tokenizer.parse_state.options.constructs.code_fenced
+ || tokenizer.parse_state.options.constructs.math_flow
+ {
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
- tokenizer.enter(Name::CodeFenced);
- tokenizer.enter(Name::CodeFencedFence);
tokenizer.attempt(
- State::Next(StateName::CodeFencedBeforeSequenceOpen),
+ State::Next(StateName::RawFlowBeforeSequenceOpen),
State::Nok,
);
return State::Retry(space_or_tab_min_max(
@@ -147,10 +175,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
));
}
- if matches!(tokenizer.current, Some(b'`' | b'~')) {
- tokenizer.enter(Name::CodeFenced);
- tokenizer.enter(Name::CodeFencedFence);
- return State::Retry(StateName::CodeFencedBeforeSequenceOpen);
+ if matches!(tokenizer.current, Some(b'$' | b'`' | b'~')) {
+ return State::Retry(StateName::RawFlowBeforeSequenceOpen);
}
}
@@ -179,11 +205,35 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
}
}
- if let Some(b'`' | b'~') = tokenizer.current {
+ // Code (fenced).
+ if (tokenizer.parse_state.options.constructs.code_fenced
+ && matches!(tokenizer.current, Some(b'`' | b'~')))
+ // Math (flow).
+ || (tokenizer.parse_state.options.constructs.math_flow && tokenizer.current == Some(b'$'))
+ {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
tokenizer.tokenize_state.size_c = prefix;
- tokenizer.enter(Name::CodeFencedFenceSequence);
- State::Retry(StateName::CodeFencedSequenceOpen)
+ if tokenizer.tokenize_state.marker == b'$' {
+ tokenizer.tokenize_state.token_1 = Name::MathFlow;
+ tokenizer.tokenize_state.token_2 = Name::MathFlowFence;
+ tokenizer.tokenize_state.token_3 = Name::MathFlowFenceSequence;
+ // Math (flow) does not support an `info` part: everything after the
+ // opening sequence is the `meta` part.
+ tokenizer.tokenize_state.token_5 = Name::MathFlowFenceMeta;
+ tokenizer.tokenize_state.token_6 = Name::MathFlowChunk;
+ } else {
+ tokenizer.tokenize_state.token_1 = Name::CodeFenced;
+ tokenizer.tokenize_state.token_2 = Name::CodeFencedFence;
+ tokenizer.tokenize_state.token_3 = Name::CodeFencedFenceSequence;
+ tokenizer.tokenize_state.token_4 = Name::CodeFencedFenceInfo;
+ tokenizer.tokenize_state.token_5 = Name::CodeFencedFenceMeta;
+ tokenizer.tokenize_state.token_6 = Name::CodeFlowChunk;
+ }
+
+ tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
+ State::Retry(StateName::RawFlowSequenceOpen)
} else {
State::Nok
}
@@ -201,19 +251,43 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Next(StateName::CodeFencedSequenceOpen)
- } else if tokenizer.tokenize_state.size < CODE_FENCED_SEQUENCE_SIZE_MIN {
+ State::Next(StateName::RawFlowSequenceOpen)
+ }
+ // To do: constant.
+ else if tokenizer.tokenize_state.size
+ < (if tokenizer.tokenize_state.marker == b'$' {
+ 2
+ } else {
+ CODE_FENCED_SEQUENCE_SIZE_MIN
+ })
+ {
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.size_c = 0;
tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ tokenizer.tokenize_state.token_4 = Name::Data;
+ tokenizer.tokenize_state.token_5 = Name::Data;
+ tokenizer.tokenize_state.token_6 = Name::Data;
State::Nok
- } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
- tokenizer.exit(Name::CodeFencedFenceSequence);
- tokenizer.attempt(State::Next(StateName::CodeFencedInfoBefore), State::Nok);
- State::Retry(space_or_tab(tokenizer))
} else {
- tokenizer.exit(Name::CodeFencedFenceSequence);
- State::Retry(StateName::CodeFencedInfoBefore)
+ // Math (flow) does not support an `info` part: everything after the
+ // opening sequence is the `meta` part.
+ let next = if tokenizer.tokenize_state.marker == b'$' {
+ StateName::RawFlowMetaBefore
+ } else {
+ StateName::RawFlowInfoBefore
+ };
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
+ tokenizer.attempt(State::Next(next), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
+ State::Retry(next)
+ }
}
}
@@ -228,17 +302,17 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
pub fn info_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
- tokenizer.exit(Name::CodeFencedFence);
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
// Do not form containers.
tokenizer.concrete = true;
tokenizer.check(
- State::Next(StateName::CodeFencedAtNonLazyBreak),
- State::Next(StateName::CodeFencedAfter),
+ State::Next(StateName::RawFlowAtNonLazyBreak),
+ State::Next(StateName::RawFlowAfter),
);
State::Retry(StateName::NonLazyContinuationStart)
}
_ => {
- tokenizer.enter(Name::CodeFencedFenceInfo);
+ tokenizer.enter(tokenizer.tokenize_state.token_4.clone());
tokenizer.enter_link(
Name::Data,
Link {
@@ -247,7 +321,7 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State {
content: Content::String,
},
);
- State::Retry(StateName::CodeFencedInfo)
+ State::Retry(StateName::RawFlowInfo)
}
}
}
@@ -264,25 +338,34 @@ pub fn info(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Name::Data);
- tokenizer.exit(Name::CodeFencedFenceInfo);
- State::Retry(StateName::CodeFencedInfoBefore)
+ tokenizer.exit(tokenizer.tokenize_state.token_4.clone());
+ State::Retry(StateName::RawFlowInfoBefore)
}
Some(b'\t' | b' ') => {
tokenizer.exit(Name::Data);
- tokenizer.exit(Name::CodeFencedFenceInfo);
- tokenizer.attempt(State::Next(StateName::CodeFencedMetaBefore), State::Nok);
+ tokenizer.exit(tokenizer.tokenize_state.token_4.clone());
+ tokenizer.attempt(State::Next(StateName::RawFlowMetaBefore), State::Nok);
State::Retry(space_or_tab(tokenizer))
}
Some(byte) => {
- if tokenizer.tokenize_state.marker == byte && byte == b'`' {
+ // This looks like code (text) / math (text).
+ // Note: no reason to check for `~`, because 3 of them can‘t be
+ // used as strikethrough in text.
+ if tokenizer.tokenize_state.marker == byte && matches!(byte, b'$' | b'`') {
tokenizer.concrete = false;
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.size_c = 0;
tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ tokenizer.tokenize_state.token_4 = Name::Data;
+ tokenizer.tokenize_state.token_5 = Name::Data;
+ tokenizer.tokenize_state.token_6 = Name::Data;
State::Nok
} else {
tokenizer.consume();
- State::Next(StateName::CodeFencedInfo)
+ State::Next(StateName::RawFlowInfo)
}
}
}
@@ -298,9 +381,9 @@ pub fn info(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn meta_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') => State::Retry(StateName::CodeFencedInfoBefore),
+ None | Some(b'\n') => State::Retry(StateName::RawFlowInfoBefore),
_ => {
- tokenizer.enter(Name::CodeFencedFenceMeta);
+ tokenizer.enter(tokenizer.tokenize_state.token_5.clone());
tokenizer.enter_link(
Name::Data,
Link {
@@ -309,7 +392,7 @@ pub fn meta_before(tokenizer: &mut Tokenizer) -> State {
content: Content::String,
},
);
- State::Retry(StateName::CodeFencedMeta)
+ State::Retry(StateName::RawFlowMeta)
}
}
}
@@ -326,19 +409,28 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Name::Data);
- tokenizer.exit(Name::CodeFencedFenceMeta);
- State::Retry(StateName::CodeFencedInfoBefore)
+ tokenizer.exit(tokenizer.tokenize_state.token_5.clone());
+ State::Retry(StateName::RawFlowInfoBefore)
}
Some(byte) => {
- if tokenizer.tokenize_state.marker == byte && byte == b'`' {
+ // This looks like code (text) / math (text).
+ // Note: no reason to check for `~`, because 3 of them can‘t be
+ // used as strikethrough in text.
+ if tokenizer.tokenize_state.marker == byte && matches!(byte, b'$' | b'`') {
tokenizer.concrete = false;
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.size_c = 0;
tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ tokenizer.tokenize_state.token_4 = Name::Data;
+ tokenizer.tokenize_state.token_5 = Name::Data;
+ tokenizer.tokenize_state.token_6 = Name::Data;
State::Nok
} else {
tokenizer.consume();
- State::Next(StateName::CodeFencedMeta)
+ State::Next(StateName::RawFlowMeta)
}
}
}
@@ -355,13 +447,13 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
- State::Next(StateName::CodeFencedAfter),
- State::Next(StateName::CodeFencedContentBefore),
+ State::Next(StateName::RawFlowAfter),
+ State::Next(StateName::RawFlowContentBefore),
);
tokenizer.enter(Name::LineEnding);
tokenizer.consume();
tokenizer.exit(Name::LineEnding);
- State::Next(StateName::CodeFencedCloseStart)
+ State::Next(StateName::RawFlowCloseStart)
}
/// Before closing fence, at optional whitespace.
@@ -373,11 +465,11 @@ pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn close_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.enter(Name::CodeFencedFence);
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.attempt(
- State::Next(StateName::CodeFencedBeforeSequenceClose),
+ State::Next(StateName::RawFlowBeforeSequenceClose),
State::Nok,
);
@@ -391,7 +483,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State {
},
))
} else {
- State::Retry(StateName::CodeFencedBeforeSequenceClose)
+ State::Retry(StateName::RawFlowBeforeSequenceClose)
}
}
@@ -405,8 +497,8 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
- tokenizer.enter(Name::CodeFencedFenceSequence);
- State::Retry(StateName::CodeFencedSequenceClose)
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
+ State::Retry(StateName::RawFlowSequenceClose)
} else {
State::Nok
}
@@ -424,21 +516,19 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
tokenizer.tokenize_state.size_b += 1;
tokenizer.consume();
- State::Next(StateName::CodeFencedSequenceClose)
- } else if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN
- && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size
- {
+ State::Next(StateName::RawFlowSequenceClose)
+ } else if tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size {
tokenizer.tokenize_state.size_b = 0;
- tokenizer.exit(Name::CodeFencedFenceSequence);
+ tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.attempt(
- State::Next(StateName::CodeFencedAfterSequenceClose),
+ State::Next(StateName::RawFlowAfterSequenceClose),
State::Nok,
);
State::Retry(space_or_tab(tokenizer))
} else {
- State::Retry(StateName::CodeFencedAfterSequenceClose)
+ State::Retry(StateName::RawFlowAfterSequenceClose)
}
} else {
tokenizer.tokenize_state.size_b = 0;
@@ -457,7 +547,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
- tokenizer.exit(Name::CodeFencedFence);
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
State::Ok
}
_ => State::Nok,
@@ -476,7 +566,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Name::LineEnding);
tokenizer.consume();
tokenizer.exit(Name::LineEnding);
- State::Next(StateName::CodeFencedContentStart)
+ State::Next(StateName::RawFlowContentStart)
}
/// Before code content, definitely not before a closing fence.
@@ -490,7 +580,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State {
pub fn content_start(tokenizer: &mut Tokenizer) -> State {
if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.attempt(
- State::Next(StateName::CodeFencedBeforeContentChunk),
+ State::Next(StateName::RawFlowBeforeContentChunk),
State::Nok,
);
State::Retry(space_or_tab_min_max(
@@ -499,7 +589,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size_c,
))
} else {
- State::Retry(StateName::CodeFencedBeforeContentChunk)
+ State::Retry(StateName::RawFlowBeforeContentChunk)
}
}
@@ -515,14 +605,14 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.check(
- State::Next(StateName::CodeFencedAtNonLazyBreak),
- State::Next(StateName::CodeFencedAfter),
+ State::Next(StateName::RawFlowAtNonLazyBreak),
+ State::Next(StateName::RawFlowAfter),
);
State::Retry(StateName::NonLazyContinuationStart)
}
_ => {
- tokenizer.enter(Name::CodeFlowChunk);
- State::Retry(StateName::CodeFencedContentChunk)
+ tokenizer.enter(tokenizer.tokenize_state.token_6.clone());
+ State::Retry(StateName::RawFlowContentChunk)
}
}
}
@@ -538,17 +628,17 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State {
pub fn content_chunk(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
- tokenizer.exit(Name::CodeFlowChunk);
- State::Retry(StateName::CodeFencedBeforeContentChunk)
+ tokenizer.exit(tokenizer.tokenize_state.token_6.clone());
+ State::Retry(StateName::RawFlowBeforeContentChunk)
}
_ => {
tokenizer.consume();
- State::Next(StateName::CodeFencedContentChunk)
+ State::Next(StateName::RawFlowContentChunk)
}
}
}
-/// After fenced code.
+/// After raw.
///
/// ```markdown
/// | ~~~js
@@ -557,10 +647,16 @@ pub fn content_chunk(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.exit(Name::CodeFenced);
+ tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.size_c = 0;
tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ tokenizer.tokenize_state.token_4 = Name::Data;
+ tokenizer.tokenize_state.token_5 = Name::Data;
+ tokenizer.tokenize_state.token_6 = Name::Data;
// Feel free to interrupt.
tokenizer.interrupt = false;
// No longer concrete.
diff --git a/src/construct/raw_text.rs b/src/construct/raw_text.rs
index 7f3990d..1a4d03c 100644
--- a/src/construct/raw_text.rs
+++ b/src/construct/raw_text.rs
@@ -57,10 +57,9 @@
//! if both exist and there is also a non-space in the code, are removed.
//! Line endings, at that stage, are considered as spaces.
//!
-//! In markdown, it is possible to create code with the
-//! [code (fenced)][code_fenced] or [code (indented)][code_indented],
-//! and math with the [math (flow)][math_flow] constructs in the [flow][]
-//! content type.
+//! In markdown, it is possible to create code or math with the
+//! [raw (flow)][raw_flow] (or [code (indented)][code_indented]) constructs
+//! in the [flow][] content type.
//!
//! ## HTML
//!
@@ -74,7 +73,7 @@
//! Instead, it is recommended to use client side JavaScript with something like
//! `KaTeX` or `MathJax` to process the math
//! For that, the math is compiled as a `<code>` element with two classes:
-//! `lang-math` and `math-inline`.
+//! `language-math` and `math-inline`.
//! Client side JavaScript can look for these classes to process them further.
//!
//! When turning markdown into HTML, each line ending in raw (text) is turned
@@ -87,9 +86,9 @@
//! Notably, GitHub currently has a really weird crappy client-side regex-based
//! thing.
//! But on your own (math-heavy?) site it can be great!
-//! Alternatively, set `options.math_text_single_dollar: false`, which prevents
-//! single dollars from being seen as math, and thus prevents normal dollars in
-//! text from being seen as math.
+//! You can set `options.math_text_single_dollar: false` to improve this, as it
+//! prevents single dollars from being seen as math, and thus prevents normal
+//! dollars in text from being seen as math.
//!
//! ## Tokens
//!
@@ -107,10 +106,12 @@
//! * [`micromark-extension-math`](https://github.com/micromark/micromark-extension-math)
//! * [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans)
//!
+//! > 👉 **Note**: math is not specified anywhere.
+//!
//! [flow]: crate::construct::flow
//! [text]: crate::construct::text
//! [code_indented]: crate::construct::code_indented
-//! [code_fenced]: crate::construct::code_fenced
+//! [raw_flow]: crate::construct::raw_flow
//! [math_flow]: # "to do"
//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element