diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-26 13:29:10 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-26 13:29:40 +0200 |
commit | 670f1d82e01ea2394b21d7d1857f41bdc67b3fce (patch) | |
tree | d38fd96745b443dc5ece52c771fa5e39653937c4 /src/construct | |
parent | f41688c067be261279804b8ab3e04cd5d67f492f (diff) | |
download | markdown-rs-670f1d82e01ea2394b21d7d1857f41bdc67b3fce.tar.gz markdown-rs-670f1d82e01ea2394b21d7d1857f41bdc67b3fce.tar.bz2 markdown-rs-670f1d82e01ea2394b21d7d1857f41bdc67b3fce.zip |
Add support for math (flow)
Diffstat (limited to 'src/construct')
-rw-r--r-- | src/construct/code_indented.rs | 4 | ||||
-rw-r--r-- | src/construct/flow.rs | 26 | ||||
-rw-r--r-- | src/construct/mod.rs | 6 | ||||
-rw-r--r-- | src/construct/partial_non_lazy_continuation.rs | 4 | ||||
-rw-r--r-- | src/construct/raw_flow.rs (renamed from src/construct/code_fenced.rs) | 252 | ||||
-rw-r--r-- | src/construct/raw_text.rs | 19 |
6 files changed, 205 insertions, 106 deletions
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 5fc9446..f2644d4 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -18,7 +18,7 @@ //! //! In markdown, it is also possible to use [code (text)][raw_text] in the //! [text][] content type. -//! It is also possible to create code with the [code (fenced)][code_fenced] +//! It is also possible to create code with the [code (fenced)][raw_flow] //! construct. //! //! ## HTML @@ -48,7 +48,7 @@ //! //! [flow]: crate::construct::flow //! [text]: crate::construct::text -//! [code_fenced]: crate::construct::code_fenced +//! [raw_flow]: crate::construct::raw_flow //! [raw_text]: crate::construct::raw_text //! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element //! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element diff --git a/src/construct/flow.rs b/src/construct/flow.rs index f3c7685..3f1cd77 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -11,12 +11,12 @@ //! The constructs found in flow are: //! //! * [Blank line][crate::construct::blank_line] -//! * [Code (fenced)][crate::construct::code_fenced] //! * [Code (indented)][crate::construct::code_indented] //! * [Definition][crate::construct::definition] //! * [Heading (atx)][crate::construct::heading_atx] //! * [Heading (setext)][crate::construct::heading_setext] //! * [HTML (flow)][crate::construct::html_flow] +//! * [Raw (flow)][crate::construct::raw_flow] (code (fenced), math (flow)) //! * [Thematic break][crate::construct::thematic_break] use crate::event::Name; @@ -42,6 +42,15 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::HeadingAtxStart) } + Some(b'$' | b'`' | b'~') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::RawFlowStart) + } + // Note: `-` is also used in setext heading underline so it’s not + // included here. Some(b'*' | b'_') => { tokenizer.attempt( State::Next(StateName::FlowAfter), @@ -71,13 +80,6 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::DefinitionStart) } - Some(b'`' | b'~') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::CodeFencedStart) - } // Actual parsing: blank line? Indented code? Indented anything? // Also includes `-` which can be a setext heading underline or thematic break. None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), @@ -112,23 +114,23 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeCodeFenced), + State::Next(StateName::FlowBeforeRaw), ); State::Retry(StateName::CodeIndentedStart) } -/// At code (fenced). +/// At raw. /// /// ````markdown /// > | ``` /// ^ /// ```` -pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { +pub fn before_raw(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeHtml), ); - State::Retry(StateName::CodeFencedStart) + State::Retry(StateName::RawFlowStart) } /// At html (flow). diff --git a/src/construct/mod.rs b/src/construct/mod.rs index a0065fa..9add015 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -25,7 +25,7 @@ //! thematic break. //! These things are called constructs here. //! Sometimes, there are several constructs that result in an equivalent thing. -//! For example, [code (fenced)][code_fenced] and +//! For example, [code (fenced)][raw_flow] and //! [code (indented)][code_indented] are considered different constructs. //! //! The following constructs are found in markdown (CommonMark): @@ -36,7 +36,6 @@ //! * [block quote][block_quote] //! * [character escape][character_escape] //! * [character reference][character_reference] -//! * [code (fenced)][code_fenced] //! * [code (indented)][code_indented] //! * [code (text)][raw_text] //! * [definition][] @@ -50,6 +49,7 @@ //! * [label start (link)][label_start_link] //! * [list item][list_item] //! * [paragraph][] +//! * [raw (flow)][raw_flow] (code (fenced), math (flow)) //! * [thematic break][thematic_break] //! //! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by @@ -143,7 +143,6 @@ pub mod blank_line; pub mod block_quote; pub mod character_escape; pub mod character_reference; -pub mod code_fenced; pub mod code_indented; pub mod definition; pub mod document; @@ -172,6 +171,7 @@ pub mod partial_space_or_tab; pub mod partial_space_or_tab_eol; pub mod partial_title; pub mod partial_whitespace; +pub mod raw_flow; pub mod raw_text; pub mod string; pub mod text; diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 35641ee..26a20dd 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -2,11 +2,11 @@ //! //! This is a tiny helper that [flow][] constructs can use to make sure that //! the following line is not lazy. -//! For example, [html (flow)][html_flow] and code ([fenced][code_fenced], +//! For example, [html (flow)][html_flow] and ([raw (flow)][raw_flow], //! [indented][code_indented]), stop when the next line is lazy. //! //! [flow]: crate::construct::flow -//! [code_fenced]: crate::construct::code_fenced +//! [raw_flow]: crate::construct::raw_flow //! [code_indented]: crate::construct::code_indented //! [html_flow]: crate::construct::html_flow diff --git a/src/construct/code_fenced.rs b/src/construct/raw_flow.rs index edb2d93..7eaac0c 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/raw_flow.rs @@ -1,4 +1,5 @@ -//! Code (fenced) occurs in the [flow][] content type. +//! Raw (flow) occurs in the [flow][] content type. +//! It forms code (fenced) and math (flow). //! //! ## Grammar //! @@ -6,20 +7,21 @@ //! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf -//! code_fenced ::= fence_open *( eol *byte ) [ eol fence_close ] +//! raw_flow ::= fence_open *( eol *byte ) [ eol fence_close ] //! -//! fence_open ::= sequence [ 1*space_or_tab info [ 1*space_or_tab meta ] ] *space_or_tab +//! ; Restriction: math (flow) does not support the `info` part. +//! fence_open ::= sequence [1*space_or_tab info [1*space_or_tab meta]] *space_or_tab //! ; Restriction: the number of markers in the closing fence sequence must be //! ; equal to or greater than the number of markers in the opening fence //! ; sequence. //! ; Restriction: the marker in the closing fence sequence must match the //! ; marker in the opening fence sequence //! fence_close ::= sequence *space_or_tab -//! sequence ::= 3*'`' | 3*'~' -//! ; Restriction: the `` ` `` character cannot occur in `info` if it is the marker. +//! sequence ::= 3*'`' | 3*'~' | 2*'$' +//! ; Restriction: the marker cannot occur in `info` if it is the `$` or `` ` `` character. //! info ::= 1*text -//! ; Restriction: the `` ` `` character cannot occur in `meta` if it is the marker. -//! meta ::= 1*text *( *space_or_tab 1*text ) +//! ; Restriction: the marker cannot occur in `meta` if it is the `$` or `` ` `` character. +//! meta ::= 1*text *(*space_or_tab 1*text) //! ``` //! //! As this construct occurs in flow, like all flow constructs, it must be @@ -27,28 +29,29 @@ //! //! The above grammar does not show how indentation (with `space_or_tab`) of //! each line is handled. -//! To parse code (fenced), let `x` be the number of `space_or_tab` characters +//! To parse raw (flow), let `x` be the number of `space_or_tab` characters //! before the opening fence sequence. //! Each line of text is then allowed (not required) to be indented with up //! to `x` spaces or tabs, which are then ignored as an indent instead of being -//! considered as part of the code. +//! considered as part of the content. //! This indent does not affect the closing fence. //! It can be indented up to a separate 3 spaces or tabs. -//! A bigger indent makes it part of the code instead of a fence. +//! A bigger indent makes it part of the content instead of a fence. //! //! The `info` and `meta` parts are interpreted as the [string][] content type. //! That means that [character escapes][character_escape] and //! [character references][character_reference] are allowed. +//! Math (flow) does not support `info`. //! //! The optional `meta` part is ignored: it is not used when parsing or //! rendering. //! //! The optional `info` part is used and is expected to specify the programming -//! language that the code is in. +//! language that the content is in. //! Which value it holds depends on what your syntax highlighter supports, if //! one is used. //! -//! In markdown, it is also possible to use [code (text)][raw_text] in the +//! In markdown, it is also possible to use [raw (text)][raw_text] in the //! [text][] content type. //! It is also possible to create code with the //! [code (indented)][code_indented] construct. @@ -60,6 +63,15 @@ //! See [*§ 4.4.3 The `pre` element*][html_pre] and the [*§ 4.5.15 The `code` //! element*][html_code] in the HTML spec for more info. //! +//! Math (flow) does not relate to HTML elements. +//! `MathML`, which is sort of like SVG but for math, exists but it doesn’t work +//! well and isn’t widely supported. +//! Instead, it is recommended to use client side JavaScript with something like +//! `KaTeX` or `MathJax` to process the math +//! For that, the math is compiled as a `<pre>`, and a `<code>` element with two +//! classes: `language-math` and `math-display`. +//! Client side JavaScript can look for these classes to process them further. +//! //! The `info` is, when rendering to HTML, typically exposed as a class. //! This behavior stems from the HTML spec ([*§ 4.5.15 The `code` //! element*][html_code]). @@ -84,6 +96,14 @@ //! Code (fenced) is more explicit, similar to code (text), and has support //! for specifying the programming language. //! +//! When authoring markdown with math, keep in mind that math doesn’t work in +//! most places. +//! Notably, GitHub currently has a really weird crappy client-side regex-based +//! thing. +//! But on your own (math-heavy?) site it can be great! +//! You can use code (fenced) with an info string of `math` to improve this, as +//! that works in many places. +//! //! ## Tokens //! //! * [`CodeFenced`][Name::CodeFenced] @@ -93,13 +113,21 @@ //! * [`CodeFencedFenceSequence`][Name::CodeFencedFenceSequence] //! * [`CodeFlowChunk`][Name::CodeFlowChunk] //! * [`LineEnding`][Name::LineEnding] +//! * [`MathFlow`][Name::MathFlow] +//! * [`MathFlowFence`][Name::MathFlowFence] +//! * [`MathFlowFenceMeta`][Name::MathFlowFenceMeta] +//! * [`MathFlowFenceSequence`][Name::MathFlowFenceSequence] +//! * [`MathFlowChunk`][Name::MathFlowChunk] //! * [`SpaceOrTab`][Name::SpaceOrTab] //! //! ## References //! //! * [`code-fenced.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js) +//! * [`micromark-extension-math`](https://github.com/micromark/micromark-extension-math) //! * [*§ 4.5 Fenced code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#fenced-code-blocks) //! +//! > 👉 **Note**: math is not specified anywhere. +//! //! [flow]: crate::construct::flow //! [string]: crate::construct::string //! [text]: crate::construct::text @@ -119,7 +147,7 @@ use crate::util::{ slice::{Position, Slice}, }; -/// Start of fenced code. +/// Start of raw. /// /// ```markdown /// > | ~~~js @@ -128,12 +156,12 @@ use crate::util::{ /// | ~~~ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.options.constructs.code_fenced { + if tokenizer.parse_state.options.constructs.code_fenced + || tokenizer.parse_state.options.constructs.math_flow + { if matches!(tokenizer.current, Some(b'\t' | b' ')) { - tokenizer.enter(Name::CodeFenced); - tokenizer.enter(Name::CodeFencedFence); tokenizer.attempt( - State::Next(StateName::CodeFencedBeforeSequenceOpen), + State::Next(StateName::RawFlowBeforeSequenceOpen), State::Nok, ); return State::Retry(space_or_tab_min_max( @@ -147,10 +175,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { )); } - if matches!(tokenizer.current, Some(b'`' | b'~')) { - tokenizer.enter(Name::CodeFenced); - tokenizer.enter(Name::CodeFencedFence); - return State::Retry(StateName::CodeFencedBeforeSequenceOpen); + if matches!(tokenizer.current, Some(b'$' | b'`' | b'~')) { + return State::Retry(StateName::RawFlowBeforeSequenceOpen); } } @@ -179,11 +205,35 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { } } - if let Some(b'`' | b'~') = tokenizer.current { + // Code (fenced). + if (tokenizer.parse_state.options.constructs.code_fenced + && matches!(tokenizer.current, Some(b'`' | b'~'))) + // Math (flow). + || (tokenizer.parse_state.options.constructs.math_flow && tokenizer.current == Some(b'$')) + { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.tokenize_state.size_c = prefix; - tokenizer.enter(Name::CodeFencedFenceSequence); - State::Retry(StateName::CodeFencedSequenceOpen) + if tokenizer.tokenize_state.marker == b'$' { + tokenizer.tokenize_state.token_1 = Name::MathFlow; + tokenizer.tokenize_state.token_2 = Name::MathFlowFence; + tokenizer.tokenize_state.token_3 = Name::MathFlowFenceSequence; + // Math (flow) does not support an `info` part: everything after the + // opening sequence is the `meta` part. + tokenizer.tokenize_state.token_5 = Name::MathFlowFenceMeta; + tokenizer.tokenize_state.token_6 = Name::MathFlowChunk; + } else { + tokenizer.tokenize_state.token_1 = Name::CodeFenced; + tokenizer.tokenize_state.token_2 = Name::CodeFencedFence; + tokenizer.tokenize_state.token_3 = Name::CodeFencedFenceSequence; + tokenizer.tokenize_state.token_4 = Name::CodeFencedFenceInfo; + tokenizer.tokenize_state.token_5 = Name::CodeFencedFenceMeta; + tokenizer.tokenize_state.token_6 = Name::CodeFlowChunk; + } + + tokenizer.enter(tokenizer.tokenize_state.token_1.clone()); + tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); + tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); + State::Retry(StateName::RawFlowSequenceOpen) } else { State::Nok } @@ -201,19 +251,43 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(tokenizer.tokenize_state.marker) { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(StateName::CodeFencedSequenceOpen) - } else if tokenizer.tokenize_state.size < CODE_FENCED_SEQUENCE_SIZE_MIN { + State::Next(StateName::RawFlowSequenceOpen) + } + // To do: constant. + else if tokenizer.tokenize_state.size + < (if tokenizer.tokenize_state.marker == b'$' { + 2 + } else { + CODE_FENCED_SEQUENCE_SIZE_MIN + }) + { tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; + tokenizer.tokenize_state.token_6 = Name::Data; State::Nok - } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { - tokenizer.exit(Name::CodeFencedFenceSequence); - tokenizer.attempt(State::Next(StateName::CodeFencedInfoBefore), State::Nok); - State::Retry(space_or_tab(tokenizer)) } else { - tokenizer.exit(Name::CodeFencedFenceSequence); - State::Retry(StateName::CodeFencedInfoBefore) + // Math (flow) does not support an `info` part: everything after the + // opening sequence is the `meta` part. + let next = if tokenizer.tokenize_state.marker == b'$' { + StateName::RawFlowMetaBefore + } else { + StateName::RawFlowInfoBefore + }; + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); + tokenizer.attempt(State::Next(next), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); + State::Retry(next) + } } } @@ -228,17 +302,17 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { pub fn info_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Name::CodeFencedFence); + tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); // Do not form containers. tokenizer.concrete = true; tokenizer.check( - State::Next(StateName::CodeFencedAtNonLazyBreak), - State::Next(StateName::CodeFencedAfter), + State::Next(StateName::RawFlowAtNonLazyBreak), + State::Next(StateName::RawFlowAfter), ); State::Retry(StateName::NonLazyContinuationStart) } _ => { - tokenizer.enter(Name::CodeFencedFenceInfo); + tokenizer.enter(tokenizer.tokenize_state.token_4.clone()); tokenizer.enter_link( Name::Data, Link { @@ -247,7 +321,7 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State { content: Content::String, }, ); - State::Retry(StateName::CodeFencedInfo) + State::Retry(StateName::RawFlowInfo) } } } @@ -264,25 +338,34 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Name::Data); - tokenizer.exit(Name::CodeFencedFenceInfo); - State::Retry(StateName::CodeFencedInfoBefore) + tokenizer.exit(tokenizer.tokenize_state.token_4.clone()); + State::Retry(StateName::RawFlowInfoBefore) } Some(b'\t' | b' ') => { tokenizer.exit(Name::Data); - tokenizer.exit(Name::CodeFencedFenceInfo); - tokenizer.attempt(State::Next(StateName::CodeFencedMetaBefore), State::Nok); + tokenizer.exit(tokenizer.tokenize_state.token_4.clone()); + tokenizer.attempt(State::Next(StateName::RawFlowMetaBefore), State::Nok); State::Retry(space_or_tab(tokenizer)) } Some(byte) => { - if tokenizer.tokenize_state.marker == byte && byte == b'`' { + // This looks like code (text) / math (text). + // Note: no reason to check for `~`, because 3 of them can‘t be + // used as strikethrough in text. + if tokenizer.tokenize_state.marker == byte && matches!(byte, b'$' | b'`') { tokenizer.concrete = false; tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; + tokenizer.tokenize_state.token_6 = Name::Data; State::Nok } else { tokenizer.consume(); - State::Next(StateName::CodeFencedInfo) + State::Next(StateName::RawFlowInfo) } } } @@ -298,9 +381,9 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn meta_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => State::Retry(StateName::CodeFencedInfoBefore), + None | Some(b'\n') => State::Retry(StateName::RawFlowInfoBefore), _ => { - tokenizer.enter(Name::CodeFencedFenceMeta); + tokenizer.enter(tokenizer.tokenize_state.token_5.clone()); tokenizer.enter_link( Name::Data, Link { @@ -309,7 +392,7 @@ pub fn meta_before(tokenizer: &mut Tokenizer) -> State { content: Content::String, }, ); - State::Retry(StateName::CodeFencedMeta) + State::Retry(StateName::RawFlowMeta) } } } @@ -326,19 +409,28 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Name::Data); - tokenizer.exit(Name::CodeFencedFenceMeta); - State::Retry(StateName::CodeFencedInfoBefore) + tokenizer.exit(tokenizer.tokenize_state.token_5.clone()); + State::Retry(StateName::RawFlowInfoBefore) } Some(byte) => { - if tokenizer.tokenize_state.marker == byte && byte == b'`' { + // This looks like code (text) / math (text). + // Note: no reason to check for `~`, because 3 of them can‘t be + // used as strikethrough in text. + if tokenizer.tokenize_state.marker == byte && matches!(byte, b'$' | b'`') { tokenizer.concrete = false; tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; + tokenizer.tokenize_state.token_6 = Name::Data; State::Nok } else { tokenizer.consume(); - State::Next(StateName::CodeFencedMeta) + State::Next(StateName::RawFlowMeta) } } } @@ -355,13 +447,13 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - State::Next(StateName::CodeFencedAfter), - State::Next(StateName::CodeFencedContentBefore), + State::Next(StateName::RawFlowAfter), + State::Next(StateName::RawFlowContentBefore), ); tokenizer.enter(Name::LineEnding); tokenizer.consume(); tokenizer.exit(Name::LineEnding); - State::Next(StateName::CodeFencedCloseStart) + State::Next(StateName::RawFlowCloseStart) } /// Before closing fence, at optional whitespace. @@ -373,11 +465,11 @@ pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn close_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.enter(Name::CodeFencedFence); + tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt( - State::Next(StateName::CodeFencedBeforeSequenceClose), + State::Next(StateName::RawFlowBeforeSequenceClose), State::Nok, ); @@ -391,7 +483,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { }, )) } else { - State::Retry(StateName::CodeFencedBeforeSequenceClose) + State::Retry(StateName::RawFlowBeforeSequenceClose) } } @@ -405,8 +497,8 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(tokenizer.tokenize_state.marker) { - tokenizer.enter(Name::CodeFencedFenceSequence); - State::Retry(StateName::CodeFencedSequenceClose) + tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); + State::Retry(StateName::RawFlowSequenceClose) } else { State::Nok } @@ -424,21 +516,19 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(tokenizer.tokenize_state.marker) { tokenizer.tokenize_state.size_b += 1; tokenizer.consume(); - State::Next(StateName::CodeFencedSequenceClose) - } else if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN - && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size - { + State::Next(StateName::RawFlowSequenceClose) + } else if tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size { tokenizer.tokenize_state.size_b = 0; - tokenizer.exit(Name::CodeFencedFenceSequence); + tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt( - State::Next(StateName::CodeFencedAfterSequenceClose), + State::Next(StateName::RawFlowAfterSequenceClose), State::Nok, ); State::Retry(space_or_tab(tokenizer)) } else { - State::Retry(StateName::CodeFencedAfterSequenceClose) + State::Retry(StateName::RawFlowAfterSequenceClose) } } else { tokenizer.tokenize_state.size_b = 0; @@ -457,7 +547,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Name::CodeFencedFence); + tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); State::Ok } _ => State::Nok, @@ -476,7 +566,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Name::LineEnding); tokenizer.consume(); tokenizer.exit(Name::LineEnding); - State::Next(StateName::CodeFencedContentStart) + State::Next(StateName::RawFlowContentStart) } /// Before code content, definitely not before a closing fence. @@ -490,7 +580,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { pub fn content_start(tokenizer: &mut Tokenizer) -> State { if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt( - State::Next(StateName::CodeFencedBeforeContentChunk), + State::Next(StateName::RawFlowBeforeContentChunk), State::Nok, ); State::Retry(space_or_tab_min_max( @@ -499,7 +589,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size_c, )) } else { - State::Retry(StateName::CodeFencedBeforeContentChunk) + State::Retry(StateName::RawFlowBeforeContentChunk) } } @@ -515,14 +605,14 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.check( - State::Next(StateName::CodeFencedAtNonLazyBreak), - State::Next(StateName::CodeFencedAfter), + State::Next(StateName::RawFlowAtNonLazyBreak), + State::Next(StateName::RawFlowAfter), ); State::Retry(StateName::NonLazyContinuationStart) } _ => { - tokenizer.enter(Name::CodeFlowChunk); - State::Retry(StateName::CodeFencedContentChunk) + tokenizer.enter(tokenizer.tokenize_state.token_6.clone()); + State::Retry(StateName::RawFlowContentChunk) } } } @@ -538,17 +628,17 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { pub fn content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Name::CodeFlowChunk); - State::Retry(StateName::CodeFencedBeforeContentChunk) + tokenizer.exit(tokenizer.tokenize_state.token_6.clone()); + State::Retry(StateName::RawFlowBeforeContentChunk) } _ => { tokenizer.consume(); - State::Next(StateName::CodeFencedContentChunk) + State::Next(StateName::RawFlowContentChunk) } } } -/// After fenced code. +/// After raw. /// /// ```markdown /// | ~~~js @@ -557,10 +647,16 @@ pub fn content_chunk(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { - tokenizer.exit(Name::CodeFenced); + tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; + tokenizer.tokenize_state.token_6 = Name::Data; // Feel free to interrupt. tokenizer.interrupt = false; // No longer concrete. diff --git a/src/construct/raw_text.rs b/src/construct/raw_text.rs index 7f3990d..1a4d03c 100644 --- a/src/construct/raw_text.rs +++ b/src/construct/raw_text.rs @@ -57,10 +57,9 @@ //! if both exist and there is also a non-space in the code, are removed. //! Line endings, at that stage, are considered as spaces. //! -//! In markdown, it is possible to create code with the -//! [code (fenced)][code_fenced] or [code (indented)][code_indented], -//! and math with the [math (flow)][math_flow] constructs in the [flow][] -//! content type. +//! In markdown, it is possible to create code or math with the +//! [raw (flow)][raw_flow] (or [code (indented)][code_indented]) constructs +//! in the [flow][] content type. //! //! ## HTML //! @@ -74,7 +73,7 @@ //! Instead, it is recommended to use client side JavaScript with something like //! `KaTeX` or `MathJax` to process the math //! For that, the math is compiled as a `<code>` element with two classes: -//! `lang-math` and `math-inline`. +//! `language-math` and `math-inline`. //! Client side JavaScript can look for these classes to process them further. //! //! When turning markdown into HTML, each line ending in raw (text) is turned @@ -87,9 +86,9 @@ //! Notably, GitHub currently has a really weird crappy client-side regex-based //! thing. //! But on your own (math-heavy?) site it can be great! -//! Alternatively, set `options.math_text_single_dollar: false`, which prevents -//! single dollars from being seen as math, and thus prevents normal dollars in -//! text from being seen as math. +//! You can set `options.math_text_single_dollar: false` to improve this, as it +//! prevents single dollars from being seen as math, and thus prevents normal +//! dollars in text from being seen as math. //! //! ## Tokens //! @@ -107,10 +106,12 @@ //! * [`micromark-extension-math`](https://github.com/micromark/micromark-extension-math) //! * [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans) //! +//! > 👉 **Note**: math is not specified anywhere. +//! //! [flow]: crate::construct::flow //! [text]: crate::construct::text //! [code_indented]: crate::construct::code_indented -//! [code_fenced]: crate::construct::code_fenced +//! [raw_flow]: crate::construct::raw_flow //! [math_flow]: # "to do" //! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element |