diff options
-rw-r--r-- | readme.md | 2 | ||||
-rw-r--r-- | src/compiler.rs | 79 | ||||
-rw-r--r-- | src/construct/code_indented.rs | 4 | ||||
-rw-r--r-- | src/construct/flow.rs | 26 | ||||
-rw-r--r-- | src/construct/mod.rs | 6 | ||||
-rw-r--r-- | src/construct/partial_non_lazy_continuation.rs | 4 | ||||
-rw-r--r-- | src/construct/raw_flow.rs (renamed from src/construct/code_fenced.rs) | 252 | ||||
-rw-r--r-- | src/construct/raw_text.rs | 19 | ||||
-rw-r--r-- | src/event.rs | 127 | ||||
-rw-r--r-- | src/lib.rs | 14 | ||||
-rw-r--r-- | src/state.rs | 72 | ||||
-rw-r--r-- | src/tokenizer.rs | 3 | ||||
-rw-r--r-- | src/util/constant.rs | 8 | ||||
-rw-r--r-- | tests/math_flow.rs | 250 | ||||
-rw-r--r-- | tests/math_text.rs | 54 |
15 files changed, 699 insertions, 221 deletions
@@ -146,7 +146,7 @@ They are not enabled by default but can be turned on with `options.constructs`. - [ ] table - [ ] tagfilter - [x] task list item -- [ ] math +- [x] math It is not a goal of this project to support lots of different extensions. It’s instead a goal to support incredibly common, somewhat standardized, diff --git a/src/compiler.rs b/src/compiler.rs index 73834a4..9057505 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -85,10 +85,10 @@ struct CompileContext<'a> { pub heading_atx_rank: Option<usize>, /// Buffer of heading (setext) text. pub heading_setext_buffer: Option<String>, - /// Whether code (flow) contains data. - pub code_flow_seen_data: Option<bool>, - /// Number of code (fenced) fenced. - pub code_fenced_fences_count: Option<usize>, + /// Whether raw (flow) (code (fenced), math (flow)) or code (indented) contains data. + pub raw_flow_seen_data: Option<bool>, + /// Number of raw (flow) fences. + pub raw_flow_fences_count: Option<usize>, /// Whether we are in code (text). pub raw_text_inside: bool, /// Whether we are in image text. @@ -143,8 +143,8 @@ impl<'a> CompileContext<'a> { bytes, heading_atx_rank: None, heading_setext_buffer: None, - code_flow_seen_data: None, - code_fenced_fences_count: None, + raw_flow_seen_data: None, + raw_flow_fences_count: None, raw_text_inside: false, character_reference_marker: None, list_expect_first_marker: None, @@ -329,6 +329,7 @@ fn enter(context: &mut CompileContext) { match context.events[context.index].name { Name::CodeFencedFenceInfo | Name::CodeFencedFenceMeta + | Name::MathFlowFenceMeta | Name::DefinitionLabelString | Name::DefinitionTitleString | Name::GfmFootnoteDefinitionPrefix @@ -340,7 +341,7 @@ fn enter(context: &mut CompileContext) { Name::BlockQuote => on_enter_block_quote(context), Name::CodeIndented => on_enter_code_indented(context), - Name::CodeFenced => on_enter_code_fenced(context), + Name::CodeFenced | Name::MathFlow => on_enter_raw_flow(context), Name::CodeText | Name::MathText => on_enter_raw_text(context), Name::Definition => on_enter_definition(context), Name::DefinitionDestinationString => on_enter_definition_destination_string(context), @@ -367,7 +368,9 @@ fn enter(context: &mut CompileContext) { /// Handle [`Exit`][Kind::Exit]. fn exit(context: &mut CompileContext) { match context.events[context.index].name { - Name::CodeFencedFenceMeta | Name::Resource => on_exit_drop(context), + Name::CodeFencedFenceMeta | Name::MathFlowFenceMeta | Name::Resource => { + on_exit_drop(context); + } Name::CharacterEscapeValue | Name::CodeTextData | Name::Data | Name::MathTextData => { on_exit_data(context); } @@ -383,10 +386,10 @@ fn exit(context: &mut CompileContext) { on_exit_character_reference_marker_hexadecimal(context); } Name::CharacterReferenceValue => on_exit_character_reference_value(context), - Name::CodeFenced | Name::CodeIndented => on_exit_code_flow(context), - Name::CodeFencedFence => on_exit_code_fenced_fence(context), - Name::CodeFencedFenceInfo => on_exit_code_fenced_fence_info(context), - Name::CodeFlowChunk => on_exit_code_flow_chunk(context), + Name::CodeFenced | Name::CodeIndented | Name::MathFlow => on_exit_raw_flow(context), + Name::CodeFencedFence | Name::MathFlowFence => on_exit_raw_flow_fence(context), + Name::CodeFencedFenceInfo => on_exit_raw_flow_fence_info(context), + Name::CodeFlowChunk | Name::MathFlowChunk => on_exit_raw_flow_chunk(context), Name::CodeText | Name::MathText => on_exit_raw_text(context), Name::Definition => on_exit_definition(context), Name::DefinitionDestinationString => on_exit_definition_destination_string(context), @@ -447,18 +450,22 @@ fn on_enter_block_quote(context: &mut CompileContext) { /// Handle [`Enter`][Kind::Enter]:[`CodeIndented`][Name::CodeIndented]. fn on_enter_code_indented(context: &mut CompileContext) { - context.code_flow_seen_data = Some(false); + context.raw_flow_seen_data = Some(false); context.line_ending_if_needed(); context.push("<pre><code>"); } -/// Handle [`Enter`][Kind::Enter]:[`CodeFenced`][Name::CodeFenced]. -fn on_enter_code_fenced(context: &mut CompileContext) { - context.code_flow_seen_data = Some(false); +/// Handle [`Enter`][Kind::Enter]:{[`CodeFenced`][Name::CodeFenced],[`MathFlow`][Name::MathFlow]}. +fn on_enter_raw_flow(context: &mut CompileContext) { + context.raw_flow_seen_data = Some(false); context.line_ending_if_needed(); - // Note that no `>` is used, which is added later. + // Note that no `>` is used, which is added later (due to info) context.push("<pre><code"); - context.code_fenced_fences_count = Some(0); + context.raw_flow_fences_count = Some(0); + + if context.events[context.index].name == Name::MathFlow { + context.push(" class=\"language-math math-display\""); + } } /// Handle [`Enter`][Kind::Enter]:{[`CodeText`][Name::CodeText],[`MathText`][Name::MathText]}. @@ -467,7 +474,7 @@ fn on_enter_raw_text(context: &mut CompileContext) { if !context.image_alt_inside { context.push("<code"); if context.events[context.index].name == Name::MathText { - context.push(" class=\"lang-math math-inline\""); + context.push(" class=\"language-math math-inline\""); } context.push(">"); } @@ -802,9 +809,9 @@ fn on_exit_character_reference_value(context: &mut CompileContext) { context.push(&encode(&value, context.encode_html)); } -/// Handle [`Exit`][Kind::Exit]:[`CodeFlowChunk`][Name::CodeFlowChunk]. -fn on_exit_code_flow_chunk(context: &mut CompileContext) { - context.code_flow_seen_data = Some(true); +/// Handle [`Exit`][Kind::Exit]:{[`CodeFlowChunk`][Name::CodeFlowChunk],[`MathFlowChunk`][Name::MathFlowChunk]}. +fn on_exit_raw_flow_chunk(context: &mut CompileContext) { + context.raw_flow_seen_data = Some(true); context.push(&encode( &Slice::from_position( context.bytes, @@ -816,9 +823,9 @@ fn on_exit_code_flow_chunk(context: &mut CompileContext) { )); } -/// Handle [`Exit`][Kind::Exit]:[`CodeFencedFence`][Name::CodeFencedFence]. -fn on_exit_code_fenced_fence(context: &mut CompileContext) { - let count = if let Some(count) = context.code_fenced_fences_count { +/// Handle [`Exit`][Kind::Exit]:{[`CodeFencedFence`][Name::CodeFencedFence],[`MathFlowFence`][Name::MathFlowFence]}. +fn on_exit_raw_flow_fence(context: &mut CompileContext) { + let count = if let Some(count) = context.raw_flow_fences_count { count } else { 0 @@ -829,31 +836,33 @@ fn on_exit_code_fenced_fence(context: &mut CompileContext) { context.slurp_one_line_ending = true; } - context.code_fenced_fences_count = Some(count + 1); + context.raw_flow_fences_count = Some(count + 1); } /// Handle [`Exit`][Kind::Exit]:[`CodeFencedFenceInfo`][Name::CodeFencedFenceInfo]. -fn on_exit_code_fenced_fence_info(context: &mut CompileContext) { +/// +/// Note: math (flow) does not support `info`. +fn on_exit_raw_flow_fence_info(context: &mut CompileContext) { let value = context.resume(); context.push(" class=\"language-"); context.push(&value); context.push("\""); } -/// Handle [`Exit`][Kind::Exit]:{[`CodeFenced`][Name::CodeFenced],[`CodeIndented`][Name::CodeIndented]}. -fn on_exit_code_flow(context: &mut CompileContext) { - // One special case is if we are inside a container, and the fenced code was +/// Handle [`Exit`][Kind::Exit]:{[`CodeFenced`][Name::CodeFenced],[`CodeIndented`][Name::CodeIndented],[`MathFlow`][Name::MathFlow]}. +fn on_exit_raw_flow(context: &mut CompileContext) { + // One special case is if we are inside a container, and the raw (flow) was // not closed (meaning it runs to the end). // In that case, the following line ending, is considered *outside* the // fenced code and block quote by micromark, but CM wants to treat that // ending as part of the code. - if let Some(count) = context.code_fenced_fences_count { + if let Some(count) = context.raw_flow_fences_count { // No closing fence. if count == 1 // In a container. && !context.tight_stack.is_empty() // Empty (as the closing is right at the opening fence) - && context.events[context.index - 1].name != Name::CodeFencedFence + && !matches!(context.events[context.index - 1].name, Name::CodeFencedFence | Name::MathFlowFence) { context.line_ending(); } @@ -862,16 +871,16 @@ fn on_exit_code_flow(context: &mut CompileContext) { // But in most cases, it’s simpler: when we’ve seen some data, emit an extra // line ending when needed. if context - .code_flow_seen_data + .raw_flow_seen_data .take() - .expect("`code_flow_seen_data` must be defined") + .expect("`raw_flow_seen_data` must be defined") { context.line_ending_if_needed(); } context.push("</code></pre>"); - if let Some(count) = context.code_fenced_fences_count.take() { + if let Some(count) = context.raw_flow_fences_count.take() { if count < 2 { context.line_ending_if_needed(); } diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 5fc9446..f2644d4 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -18,7 +18,7 @@ //! //! In markdown, it is also possible to use [code (text)][raw_text] in the //! [text][] content type. -//! It is also possible to create code with the [code (fenced)][code_fenced] +//! It is also possible to create code with the [code (fenced)][raw_flow] //! construct. //! //! ## HTML @@ -48,7 +48,7 @@ //! //! [flow]: crate::construct::flow //! [text]: crate::construct::text -//! [code_fenced]: crate::construct::code_fenced +//! [raw_flow]: crate::construct::raw_flow //! [raw_text]: crate::construct::raw_text //! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element //! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element diff --git a/src/construct/flow.rs b/src/construct/flow.rs index f3c7685..3f1cd77 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -11,12 +11,12 @@ //! The constructs found in flow are: //! //! * [Blank line][crate::construct::blank_line] -//! * [Code (fenced)][crate::construct::code_fenced] //! * [Code (indented)][crate::construct::code_indented] //! * [Definition][crate::construct::definition] //! * [Heading (atx)][crate::construct::heading_atx] //! * [Heading (setext)][crate::construct::heading_setext] //! * [HTML (flow)][crate::construct::html_flow] +//! * [Raw (flow)][crate::construct::raw_flow] (code (fenced), math (flow)) //! * [Thematic break][crate::construct::thematic_break] use crate::event::Name; @@ -42,6 +42,15 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::HeadingAtxStart) } + Some(b'$' | b'`' | b'~') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeParagraph), + ); + State::Retry(StateName::RawFlowStart) + } + // Note: `-` is also used in setext heading underline so it’s not + // included here. Some(b'*' | b'_') => { tokenizer.attempt( State::Next(StateName::FlowAfter), @@ -71,13 +80,6 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::DefinitionStart) } - Some(b'`' | b'~') => { - tokenizer.attempt( - State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), - ); - State::Retry(StateName::CodeFencedStart) - } // Actual parsing: blank line? Indented code? Indented anything? // Also includes `-` which can be a setext heading underline or thematic break. None | Some(b'\t' | b'\n' | b' ' | b'-') => State::Retry(StateName::FlowBlankLineBefore), @@ -112,23 +114,23 @@ pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeCodeFenced), + State::Next(StateName::FlowBeforeRaw), ); State::Retry(StateName::CodeIndentedStart) } -/// At code (fenced). +/// At raw. /// /// ````markdown /// > | ``` /// ^ /// ```` -pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { +pub fn before_raw(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), State::Next(StateName::FlowBeforeHtml), ); - State::Retry(StateName::CodeFencedStart) + State::Retry(StateName::RawFlowStart) } /// At html (flow). diff --git a/src/construct/mod.rs b/src/construct/mod.rs index a0065fa..9add015 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -25,7 +25,7 @@ //! thematic break. //! These things are called constructs here. //! Sometimes, there are several constructs that result in an equivalent thing. -//! For example, [code (fenced)][code_fenced] and +//! For example, [code (fenced)][raw_flow] and //! [code (indented)][code_indented] are considered different constructs. //! //! The following constructs are found in markdown (CommonMark): @@ -36,7 +36,6 @@ //! * [block quote][block_quote] //! * [character escape][character_escape] //! * [character reference][character_reference] -//! * [code (fenced)][code_fenced] //! * [code (indented)][code_indented] //! * [code (text)][raw_text] //! * [definition][] @@ -50,6 +49,7 @@ //! * [label start (link)][label_start_link] //! * [list item][list_item] //! * [paragraph][] +//! * [raw (flow)][raw_flow] (code (fenced), math (flow)) //! * [thematic break][thematic_break] //! //! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by @@ -143,7 +143,6 @@ pub mod blank_line; pub mod block_quote; pub mod character_escape; pub mod character_reference; -pub mod code_fenced; pub mod code_indented; pub mod definition; pub mod document; @@ -172,6 +171,7 @@ pub mod partial_space_or_tab; pub mod partial_space_or_tab_eol; pub mod partial_title; pub mod partial_whitespace; +pub mod raw_flow; pub mod raw_text; pub mod string; pub mod text; diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 35641ee..26a20dd 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -2,11 +2,11 @@ //! //! This is a tiny helper that [flow][] constructs can use to make sure that //! the following line is not lazy. -//! For example, [html (flow)][html_flow] and code ([fenced][code_fenced], +//! For example, [html (flow)][html_flow] and ([raw (flow)][raw_flow], //! [indented][code_indented]), stop when the next line is lazy. //! //! [flow]: crate::construct::flow -//! [code_fenced]: crate::construct::code_fenced +//! [raw_flow]: crate::construct::raw_flow //! [code_indented]: crate::construct::code_indented //! [html_flow]: crate::construct::html_flow diff --git a/src/construct/code_fenced.rs b/src/construct/raw_flow.rs index edb2d93..7eaac0c 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/raw_flow.rs @@ -1,4 +1,5 @@ -//! Code (fenced) occurs in the [flow][] content type. +//! Raw (flow) occurs in the [flow][] content type. +//! It forms code (fenced) and math (flow). //! //! ## Grammar //! @@ -6,20 +7,21 @@ //! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf -//! code_fenced ::= fence_open *( eol *byte ) [ eol fence_close ] +//! raw_flow ::= fence_open *( eol *byte ) [ eol fence_close ] //! -//! fence_open ::= sequence [ 1*space_or_tab info [ 1*space_or_tab meta ] ] *space_or_tab +//! ; Restriction: math (flow) does not support the `info` part. +//! fence_open ::= sequence [1*space_or_tab info [1*space_or_tab meta]] *space_or_tab //! ; Restriction: the number of markers in the closing fence sequence must be //! ; equal to or greater than the number of markers in the opening fence //! ; sequence. //! ; Restriction: the marker in the closing fence sequence must match the //! ; marker in the opening fence sequence //! fence_close ::= sequence *space_or_tab -//! sequence ::= 3*'`' | 3*'~' -//! ; Restriction: the `` ` `` character cannot occur in `info` if it is the marker. +//! sequence ::= 3*'`' | 3*'~' | 2*'$' +//! ; Restriction: the marker cannot occur in `info` if it is the `$` or `` ` `` character. //! info ::= 1*text -//! ; Restriction: the `` ` `` character cannot occur in `meta` if it is the marker. -//! meta ::= 1*text *( *space_or_tab 1*text ) +//! ; Restriction: the marker cannot occur in `meta` if it is the `$` or `` ` `` character. +//! meta ::= 1*text *(*space_or_tab 1*text) //! ``` //! //! As this construct occurs in flow, like all flow constructs, it must be @@ -27,28 +29,29 @@ //! //! The above grammar does not show how indentation (with `space_or_tab`) of //! each line is handled. -//! To parse code (fenced), let `x` be the number of `space_or_tab` characters +//! To parse raw (flow), let `x` be the number of `space_or_tab` characters //! before the opening fence sequence. //! Each line of text is then allowed (not required) to be indented with up //! to `x` spaces or tabs, which are then ignored as an indent instead of being -//! considered as part of the code. +//! considered as part of the content. //! This indent does not affect the closing fence. //! It can be indented up to a separate 3 spaces or tabs. -//! A bigger indent makes it part of the code instead of a fence. +//! A bigger indent makes it part of the content instead of a fence. //! //! The `info` and `meta` parts are interpreted as the [string][] content type. //! That means that [character escapes][character_escape] and //! [character references][character_reference] are allowed. +//! Math (flow) does not support `info`. //! //! The optional `meta` part is ignored: it is not used when parsing or //! rendering. //! //! The optional `info` part is used and is expected to specify the programming -//! language that the code is in. +//! language that the content is in. //! Which value it holds depends on what your syntax highlighter supports, if //! one is used. //! -//! In markdown, it is also possible to use [code (text)][raw_text] in the +//! In markdown, it is also possible to use [raw (text)][raw_text] in the //! [text][] content type. //! It is also possible to create code with the //! [code (indented)][code_indented] construct. @@ -60,6 +63,15 @@ //! See [*§ 4.4.3 The `pre` element*][html_pre] and the [*§ 4.5.15 The `code` //! element*][html_code] in the HTML spec for more info. //! +//! Math (flow) does not relate to HTML elements. +//! `MathML`, which is sort of like SVG but for math, exists but it doesn’t work +//! well and isn’t widely supported. +//! Instead, it is recommended to use client side JavaScript with something like +//! `KaTeX` or `MathJax` to process the math +//! For that, the math is compiled as a `<pre>`, and a `<code>` element with two +//! classes: `language-math` and `math-display`. +//! Client side JavaScript can look for these classes to process them further. +//! //! The `info` is, when rendering to HTML, typically exposed as a class. //! This behavior stems from the HTML spec ([*§ 4.5.15 The `code` //! element*][html_code]). @@ -84,6 +96,14 @@ //! Code (fenced) is more explicit, similar to code (text), and has support //! for specifying the programming language. //! +//! When authoring markdown with math, keep in mind that math doesn’t work in +//! most places. +//! Notably, GitHub currently has a really weird crappy client-side regex-based +//! thing. +//! But on your own (math-heavy?) site it can be great! +//! You can use code (fenced) with an info string of `math` to improve this, as +//! that works in many places. +//! //! ## Tokens //! //! * [`CodeFenced`][Name::CodeFenced] @@ -93,13 +113,21 @@ //! * [`CodeFencedFenceSequence`][Name::CodeFencedFenceSequence] //! * [`CodeFlowChunk`][Name::CodeFlowChunk] //! * [`LineEnding`][Name::LineEnding] +//! * [`MathFlow`][Name::MathFlow] +//! * [`MathFlowFence`][Name::MathFlowFence] +//! * [`MathFlowFenceMeta`][Name::MathFlowFenceMeta] +//! * [`MathFlowFenceSequence`][Name::MathFlowFenceSequence] +//! * [`MathFlowChunk`][Name::MathFlowChunk] //! * [`SpaceOrTab`][Name::SpaceOrTab] //! //! ## References //! //! * [`code-fenced.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js) +//! * [`micromark-extension-math`](https://github.com/micromark/micromark-extension-math) //! * [*§ 4.5 Fenced code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#fenced-code-blocks) //! +//! > 👉 **Note**: math is not specified anywhere. +//! //! [flow]: crate::construct::flow //! [string]: crate::construct::string //! [text]: crate::construct::text @@ -119,7 +147,7 @@ use crate::util::{ slice::{Position, Slice}, }; -/// Start of fenced code. +/// Start of raw. /// /// ```markdown /// > | ~~~js @@ -128,12 +156,12 @@ use crate::util::{ /// | ~~~ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if tokenizer.parse_state.options.constructs.code_fenced { + if tokenizer.parse_state.options.constructs.code_fenced + || tokenizer.parse_state.options.constructs.math_flow + { if matches!(tokenizer.current, Some(b'\t' | b' ')) { - tokenizer.enter(Name::CodeFenced); - tokenizer.enter(Name::CodeFencedFence); tokenizer.attempt( - State::Next(StateName::CodeFencedBeforeSequenceOpen), + State::Next(StateName::RawFlowBeforeSequenceOpen), State::Nok, ); return State::Retry(space_or_tab_min_max( @@ -147,10 +175,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { )); } - if matches!(tokenizer.current, Some(b'`' | b'~')) { - tokenizer.enter(Name::CodeFenced); - tokenizer.enter(Name::CodeFencedFence); - return State::Retry(StateName::CodeFencedBeforeSequenceOpen); + if matches!(tokenizer.current, Some(b'$' | b'`' | b'~')) { + return State::Retry(StateName::RawFlowBeforeSequenceOpen); } } @@ -179,11 +205,35 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { } } - if let Some(b'`' | b'~') = tokenizer.current { + // Code (fenced). + if (tokenizer.parse_state.options.constructs.code_fenced + && matches!(tokenizer.current, Some(b'`' | b'~'))) + // Math (flow). + || (tokenizer.parse_state.options.constructs.math_flow && tokenizer.current == Some(b'$')) + { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.tokenize_state.size_c = prefix; - tokenizer.enter(Name::CodeFencedFenceSequence); - State::Retry(StateName::CodeFencedSequenceOpen) + if tokenizer.tokenize_state.marker == b'$' { + tokenizer.tokenize_state.token_1 = Name::MathFlow; + tokenizer.tokenize_state.token_2 = Name::MathFlowFence; + tokenizer.tokenize_state.token_3 = Name::MathFlowFenceSequence; + // Math (flow) does not support an `info` part: everything after the + // opening sequence is the `meta` part. + tokenizer.tokenize_state.token_5 = Name::MathFlowFenceMeta; + tokenizer.tokenize_state.token_6 = Name::MathFlowChunk; + } else { + tokenizer.tokenize_state.token_1 = Name::CodeFenced; + tokenizer.tokenize_state.token_2 = Name::CodeFencedFence; + tokenizer.tokenize_state.token_3 = Name::CodeFencedFenceSequence; + tokenizer.tokenize_state.token_4 = Name::CodeFencedFenceInfo; + tokenizer.tokenize_state.token_5 = Name::CodeFencedFenceMeta; + tokenizer.tokenize_state.token_6 = Name::CodeFlowChunk; + } + + tokenizer.enter(tokenizer.tokenize_state.token_1.clone()); + tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); + tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); + State::Retry(StateName::RawFlowSequenceOpen) } else { State::Nok } @@ -201,19 +251,43 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(tokenizer.tokenize_state.marker) { tokenizer.tokenize_state.size += 1; tokenizer.consume(); - State::Next(StateName::CodeFencedSequenceOpen) - } else if tokenizer.tokenize_state.size < CODE_FENCED_SEQUENCE_SIZE_MIN { + State::Next(StateName::RawFlowSequenceOpen) + } + // To do: constant. + else if tokenizer.tokenize_state.size + < (if tokenizer.tokenize_state.marker == b'$' { + 2 + } else { + CODE_FENCED_SEQUENCE_SIZE_MIN + }) + { tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; + tokenizer.tokenize_state.token_6 = Name::Data; State::Nok - } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { - tokenizer.exit(Name::CodeFencedFenceSequence); - tokenizer.attempt(State::Next(StateName::CodeFencedInfoBefore), State::Nok); - State::Retry(space_or_tab(tokenizer)) } else { - tokenizer.exit(Name::CodeFencedFenceSequence); - State::Retry(StateName::CodeFencedInfoBefore) + // Math (flow) does not support an `info` part: everything after the + // opening sequence is the `meta` part. + let next = if tokenizer.tokenize_state.marker == b'$' { + StateName::RawFlowMetaBefore + } else { + StateName::RawFlowInfoBefore + }; + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); + tokenizer.attempt(State::Next(next), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } else { + tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); + State::Retry(next) + } } } @@ -228,17 +302,17 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { pub fn info_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Name::CodeFencedFence); + tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); // Do not form containers. tokenizer.concrete = true; tokenizer.check( - State::Next(StateName::CodeFencedAtNonLazyBreak), - State::Next(StateName::CodeFencedAfter), + State::Next(StateName::RawFlowAtNonLazyBreak), + State::Next(StateName::RawFlowAfter), ); State::Retry(StateName::NonLazyContinuationStart) } _ => { - tokenizer.enter(Name::CodeFencedFenceInfo); + tokenizer.enter(tokenizer.tokenize_state.token_4.clone()); tokenizer.enter_link( Name::Data, Link { @@ -247,7 +321,7 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State { content: Content::String, }, ); - State::Retry(StateName::CodeFencedInfo) + State::Retry(StateName::RawFlowInfo) } } } @@ -264,25 +338,34 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Name::Data); - tokenizer.exit(Name::CodeFencedFenceInfo); - State::Retry(StateName::CodeFencedInfoBefore) + tokenizer.exit(tokenizer.tokenize_state.token_4.clone()); + State::Retry(StateName::RawFlowInfoBefore) } Some(b'\t' | b' ') => { tokenizer.exit(Name::Data); - tokenizer.exit(Name::CodeFencedFenceInfo); - tokenizer.attempt(State::Next(StateName::CodeFencedMetaBefore), State::Nok); + tokenizer.exit(tokenizer.tokenize_state.token_4.clone()); + tokenizer.attempt(State::Next(StateName::RawFlowMetaBefore), State::Nok); State::Retry(space_or_tab(tokenizer)) } Some(byte) => { - if tokenizer.tokenize_state.marker == byte && byte == b'`' { + // This looks like code (text) / math (text). + // Note: no reason to check for `~`, because 3 of them can‘t be + // used as strikethrough in text. + if tokenizer.tokenize_state.marker == byte && matches!(byte, b'$' | b'`') { tokenizer.concrete = false; tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; + tokenizer.tokenize_state.token_6 = Name::Data; State::Nok } else { tokenizer.consume(); - State::Next(StateName::CodeFencedInfo) + State::Next(StateName::RawFlowInfo) } } } @@ -298,9 +381,9 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn meta_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => State::Retry(StateName::CodeFencedInfoBefore), + None | Some(b'\n') => State::Retry(StateName::RawFlowInfoBefore), _ => { - tokenizer.enter(Name::CodeFencedFenceMeta); + tokenizer.enter(tokenizer.tokenize_state.token_5.clone()); tokenizer.enter_link( Name::Data, Link { @@ -309,7 +392,7 @@ pub fn meta_before(tokenizer: &mut Tokenizer) -> State { content: Content::String, }, ); - State::Retry(StateName::CodeFencedMeta) + State::Retry(StateName::RawFlowMeta) } } } @@ -326,19 +409,28 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Name::Data); - tokenizer.exit(Name::CodeFencedFenceMeta); - State::Retry(StateName::CodeFencedInfoBefore) + tokenizer.exit(tokenizer.tokenize_state.token_5.clone()); + State::Retry(StateName::RawFlowInfoBefore) } Some(byte) => { - if tokenizer.tokenize_state.marker == byte && byte == b'`' { + // This looks like code (text) / math (text). + // Note: no reason to check for `~`, because 3 of them can‘t be + // used as strikethrough in text. + if tokenizer.tokenize_state.marker == byte && matches!(byte, b'$' | b'`') { tokenizer.concrete = false; tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; + tokenizer.tokenize_state.token_6 = Name::Data; State::Nok } else { tokenizer.consume(); - State::Next(StateName::CodeFencedMeta) + State::Next(StateName::RawFlowMeta) } } } @@ -355,13 +447,13 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( - State::Next(StateName::CodeFencedAfter), - State::Next(StateName::CodeFencedContentBefore), + State::Next(StateName::RawFlowAfter), + State::Next(StateName::RawFlowContentBefore), ); tokenizer.enter(Name::LineEnding); tokenizer.consume(); tokenizer.exit(Name::LineEnding); - State::Next(StateName::CodeFencedCloseStart) + State::Next(StateName::RawFlowCloseStart) } /// Before closing fence, at optional whitespace. @@ -373,11 +465,11 @@ pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn close_start(tokenizer: &mut Tokenizer) -> State { - tokenizer.enter(Name::CodeFencedFence); + tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt( - State::Next(StateName::CodeFencedBeforeSequenceClose), + State::Next(StateName::RawFlowBeforeSequenceClose), State::Nok, ); @@ -391,7 +483,7 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { }, )) } else { - State::Retry(StateName::CodeFencedBeforeSequenceClose) + State::Retry(StateName::RawFlowBeforeSequenceClose) } } @@ -405,8 +497,8 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(tokenizer.tokenize_state.marker) { - tokenizer.enter(Name::CodeFencedFenceSequence); - State::Retry(StateName::CodeFencedSequenceClose) + tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); + State::Retry(StateName::RawFlowSequenceClose) } else { State::Nok } @@ -424,21 +516,19 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(tokenizer.tokenize_state.marker) { tokenizer.tokenize_state.size_b += 1; tokenizer.consume(); - State::Next(StateName::CodeFencedSequenceClose) - } else if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN - && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size - { + State::Next(StateName::RawFlowSequenceClose) + } else if tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size { tokenizer.tokenize_state.size_b = 0; - tokenizer.exit(Name::CodeFencedFenceSequence); + tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt( - State::Next(StateName::CodeFencedAfterSequenceClose), + State::Next(StateName::RawFlowAfterSequenceClose), State::Nok, ); State::Retry(space_or_tab(tokenizer)) } else { - State::Retry(StateName::CodeFencedAfterSequenceClose) + State::Retry(StateName::RawFlowAfterSequenceClose) } } else { tokenizer.tokenize_state.size_b = 0; @@ -457,7 +547,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Name::CodeFencedFence); + tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); State::Ok } _ => State::Nok, @@ -476,7 +566,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Name::LineEnding); tokenizer.consume(); tokenizer.exit(Name::LineEnding); - State::Next(StateName::CodeFencedContentStart) + State::Next(StateName::RawFlowContentStart) } /// Before code content, definitely not before a closing fence. @@ -490,7 +580,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { pub fn content_start(tokenizer: &mut Tokenizer) -> State { if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt( - State::Next(StateName::CodeFencedBeforeContentChunk), + State::Next(StateName::RawFlowBeforeContentChunk), State::Nok, ); State::Retry(space_or_tab_min_max( @@ -499,7 +589,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size_c, )) } else { - State::Retry(StateName::CodeFencedBeforeContentChunk) + State::Retry(StateName::RawFlowBeforeContentChunk) } } @@ -515,14 +605,14 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.check( - State::Next(StateName::CodeFencedAtNonLazyBreak), - State::Next(StateName::CodeFencedAfter), + State::Next(StateName::RawFlowAtNonLazyBreak), + State::Next(StateName::RawFlowAfter), ); State::Retry(StateName::NonLazyContinuationStart) } _ => { - tokenizer.enter(Name::CodeFlowChunk); - State::Retry(StateName::CodeFencedContentChunk) + tokenizer.enter(tokenizer.tokenize_state.token_6.clone()); + State::Retry(StateName::RawFlowContentChunk) } } } @@ -538,17 +628,17 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { pub fn content_chunk(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { - tokenizer.exit(Name::CodeFlowChunk); - State::Retry(StateName::CodeFencedBeforeContentChunk) + tokenizer.exit(tokenizer.tokenize_state.token_6.clone()); + State::Retry(StateName::RawFlowBeforeContentChunk) } _ => { tokenizer.consume(); - State::Next(StateName::CodeFencedContentChunk) + State::Next(StateName::RawFlowContentChunk) } } } -/// After fenced code. +/// After raw. /// /// ```markdown /// | ~~~js @@ -557,10 +647,16 @@ pub fn content_chunk(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { - tokenizer.exit(Name::CodeFenced); + tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); tokenizer.tokenize_state.marker = 0; tokenizer.tokenize_state.size_c = 0; tokenizer.tokenize_state.size = 0; + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.token_5 = Name::Data; + tokenizer.tokenize_state.token_6 = Name::Data; // Feel free to interrupt. tokenizer.interrupt = false; // No longer concrete. diff --git a/src/construct/raw_text.rs b/src/construct/raw_text.rs index 7f3990d..1a4d03c 100644 --- a/src/construct/raw_text.rs +++ b/src/construct/raw_text.rs @@ -57,10 +57,9 @@ //! if both exist and there is also a non-space in the code, are removed. //! Line endings, at that stage, are considered as spaces. //! -//! In markdown, it is possible to create code with the -//! [code (fenced)][code_fenced] or [code (indented)][code_indented], -//! and math with the [math (flow)][math_flow] constructs in the [flow][] -//! content type. +//! In markdown, it is possible to create code or math with the +//! [raw (flow)][raw_flow] (or [code (indented)][code_indented]) constructs +//! in the [flow][] content type. //! //! ## HTML //! @@ -74,7 +73,7 @@ //! Instead, it is recommended to use client side JavaScript with something like //! `KaTeX` or `MathJax` to process the math //! For that, the math is compiled as a `<code>` element with two classes: -//! `lang-math` and `math-inline`. +//! `language-math` and `math-inline`. //! Client side JavaScript can look for these classes to process them further. //! //! When turning markdown into HTML, each line ending in raw (text) is turned @@ -87,9 +86,9 @@ //! Notably, GitHub currently has a really weird crappy client-side regex-based //! thing. //! But on your own (math-heavy?) site it can be great! -//! Alternatively, set `options.math_text_single_dollar: false`, which prevents -//! single dollars from being seen as math, and thus prevents normal dollars in -//! text from being seen as math. +//! You can set `options.math_text_single_dollar: false` to improve this, as it +//! prevents single dollars from being seen as math, and thus prevents normal +//! dollars in text from being seen as math. //! //! ## Tokens //! @@ -107,10 +106,12 @@ //! * [`micromark-extension-math`](https://github.com/micromark/micromark-extension-math) //! * [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans) //! +//! > 👉 **Note**: math is not specified anywhere. +//! //! [flow]: crate::construct::flow //! [text]: crate::construct::text //! [code_indented]: crate::construct::code_indented -//! [code_fenced]: crate::construct::code_fenced +//! [raw_flow]: crate::construct::raw_flow //! [math_flow]: # "to do" //! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element diff --git a/src/event.rs b/src/event.rs index 869f2e8..3e540c0 100644 --- a/src/event.rs +++ b/src/event.rs @@ -351,7 +351,7 @@ pub enum Name { /// [`LineEnding`][Name::LineEnding], /// [`SpaceOrTab`][Name::SpaceOrTab] /// * **Construct**: - /// [`code_fenced`][crate::construct::code_fenced] + /// [`raw_flow`][crate::construct::raw_flow] /// /// ## Example /// @@ -376,7 +376,7 @@ pub enum Name { /// [`CodeFencedFenceSequence`][Name::CodeFencedFenceSequence], /// [`SpaceOrTab`][Name::SpaceOrTab] /// * **Construct**: - /// [`code_fenced`][crate::construct::code_fenced] + /// [`raw_flow`][crate::construct::raw_flow] /// /// ## Example /// @@ -397,7 +397,7 @@ pub enum Name { /// * **Content model**: /// [string content][crate::construct::string] /// * **Construct**: - /// [`code_fenced`][crate::construct::code_fenced] + /// [`raw_flow`][crate::construct::raw_flow] /// /// ## Example /// @@ -417,7 +417,7 @@ pub enum Name { /// * **Content model**: /// [string content][crate::construct::string] /// * **Construct**: - /// [`code_fenced`][crate::construct::code_fenced] + /// [`raw_flow`][crate::construct::raw_flow] /// /// ## Example /// @@ -437,7 +437,7 @@ pub enum Name { /// * **Content model**: /// void /// * **Construct**: - /// [`code_fenced`][crate::construct::code_fenced] + /// [`raw_flow`][crate::construct::raw_flow] /// /// ## Example /// @@ -459,7 +459,7 @@ pub enum Name { /// * **Content model**: /// void /// * **Construct**: - /// [`code_fenced`][crate::construct::code_fenced], + /// [`raw_flow`][crate::construct::raw_flow], /// [`code_indented`][crate::construct::code_indented] /// /// ## Example @@ -487,7 +487,7 @@ pub enum Name { /// [`LineEnding`][Name::LineEnding], /// [`SpaceOrTab`][Name::SpaceOrTab] /// * **Construct**: - /// [`code_fenced`][crate::construct::code_fenced] + /// [`raw_flow`][crate::construct::raw_flow] /// /// ## Example /// @@ -1889,6 +1889,115 @@ pub enum Name { /// ^^^ /// ``` ListUnordered, + /// Whole math (flow). + /// + /// ## Info + /// + /// * **Context**: + /// [flow content][crate::construct::flow] + /// * **Content model**: + /// [`MathFlowFence`][Name::MathFlowFence], + /// [`MathFlowChunk`][Name::MathFlowChunk], + /// [`LineEnding`][Name::LineEnding], + /// [`SpaceOrTab`][Name::SpaceOrTab] + /// * **Construct**: + /// [`raw_flow`][crate::construct::raw_flow] + /// + /// ## Example + /// + /// ```markdown + /// > | $$ + /// ^^ + /// > | \frac{1}{2} + /// ^^^^^^^^^^^ + /// > | $$ + /// ^^ + /// ``` + MathFlow, + /// A math (flow) fence. + /// + /// ## Info + /// + /// * **Context**: + /// [`MathFlow`][Name::MathFlow] + /// * **Content model**: + /// [`MathFlowFenceMeta`][Name::MathFlowFenceMeta], + /// [`MathFlowFenceSequence`][Name::MathFlowFenceSequence], + /// [`SpaceOrTab`][Name::SpaceOrTab] + /// * **Construct**: + /// [`raw_flow`][crate::construct::raw_flow] + /// + /// ## Example + /// + /// ```markdown + /// > | $$ + /// ^^ + /// | \frac{1}{2} + /// > | $$ + /// ^^ + /// ``` + MathFlowFence, + /// A math (flow) fence meta string. + /// + /// ## Info + /// + /// * **Context**: + /// [`MathFlowFence`][Name::MathFlowFence] + /// * **Content model**: + /// [string content][crate::construct::string] + /// * **Construct**: + /// [`raw_flow`][crate::construct::raw_flow] + /// + /// ## Example + /// + /// ```markdown + /// > | $$alpha bravo + /// ^^^^^^^^^^^ + /// | \frac{1}{2} + /// | $$ + /// ``` + MathFlowFenceMeta, + /// A math (flow) fence sequence. + /// + /// ## Info + /// + /// * **Context**: + /// [`MathFlowFenceSequence`][Name::MathFlowFenceSequence] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`raw_flow`][crate::construct::raw_flow] + /// + /// ## Example + /// + /// ```markdown + /// > | $$ + /// ^^ + /// | \frac{1}{2} + /// > | $$ + /// ^^ + /// ``` + MathFlowFenceSequence, + /// A math (flow) chunk. + /// + /// ## Info + /// + /// * **Context**: + /// [`MathFlow`][Name::MathFlow] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`raw_flow`][crate::construct::raw_flow] + /// + /// ## Example + /// + /// ```markdown + /// | $$ + /// > | \frac{1}{2} + /// ^^^^^^^^^^^ + /// | $$ + /// ``` + MathFlowChunk, /// Whole math (text). /// /// ## Info @@ -2327,7 +2436,7 @@ pub enum Name { } /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 55] = [ +pub const VOID_EVENTS: [Name; 57] = [ Name::AttentionSequence, Name::AutolinkEmail, Name::AutolinkMarker, @@ -2375,6 +2484,8 @@ pub const VOID_EVENTS: [Name; 55] = [ Name::LineEnding, Name::ListItemMarker, Name::ListItemValue, + Name::MathFlowFenceSequence, + Name::MathFlowChunk, Name::MathTextData, Name::MathTextSequence, Name::ReferenceMarker, @@ -272,6 +272,17 @@ pub struct Constructs { /// ^^^ /// ``` pub list_item: bool, + /// Math (flow). + /// + /// ```markdown + /// > | $$ + /// ^^ + /// > | \frac{1}{2} + /// ^^^^^^^^^^^ + /// > | $$ + /// ^^ + /// ``` + pub math_flow: bool, /// Math (text). /// /// ```markdown @@ -317,6 +328,7 @@ impl Default for Constructs { label_start_link: true, label_end: true, list_item: true, + math_flow: false, math_text: false, thematic_break: true, } @@ -730,7 +742,7 @@ pub struct Options { /// ..Options::default() /// } /// ), - /// "<p><code class=\"lang-math math-inline\">a</code></p>" + /// "<p><code class=\"language-math math-inline\">a</code></p>" /// ); /// /// // Pass `math_text_single_dollar: false` to turn that off: diff --git a/src/state.rs b/src/state.rs index 0c04821..a42e802 100644 --- a/src/state.rs +++ b/src/state.rs @@ -52,23 +52,23 @@ pub enum Name { CharacterReferenceNumeric, CharacterReferenceValue, - CodeFencedStart, - CodeFencedBeforeSequenceOpen, - CodeFencedSequenceOpen, - CodeFencedInfoBefore, - CodeFencedInfo, - CodeFencedMetaBefore, - CodeFencedMeta, - CodeFencedAtNonLazyBreak, - CodeFencedCloseStart, - CodeFencedBeforeSequenceClose, - CodeFencedSequenceClose, - CodeFencedAfterSequenceClose, - CodeFencedContentBefore, - CodeFencedContentStart, - CodeFencedBeforeContentChunk, - CodeFencedContentChunk, - CodeFencedAfter, + RawFlowStart, + RawFlowBeforeSequenceOpen, + RawFlowSequenceOpen, + RawFlowInfoBefore, + RawFlowInfo, + RawFlowMetaBefore, + RawFlowMeta, + RawFlowAtNonLazyBreak, + RawFlowCloseStart, + RawFlowBeforeSequenceClose, + RawFlowSequenceClose, + RawFlowAfterSequenceClose, + RawFlowContentBefore, + RawFlowContentStart, + RawFlowBeforeContentChunk, + RawFlowContentChunk, + RawFlowAfter, CodeIndentedStart, CodeIndentedAtBreak, @@ -124,7 +124,7 @@ pub enum Name { FlowStart, FlowBeforeCodeIndented, - FlowBeforeCodeFenced, + FlowBeforeRaw, FlowBeforeHtml, FlowBeforeHeadingAtx, FlowBeforeHeadingSetext, @@ -366,23 +366,23 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::CharacterReferenceNumeric => construct::character_reference::numeric, Name::CharacterReferenceValue => construct::character_reference::value, - Name::CodeFencedStart => construct::code_fenced::start, - Name::CodeFencedBeforeSequenceOpen => construct::code_fenced::before_sequence_open, - Name::CodeFencedSequenceOpen => construct::code_fenced::sequence_open, - Name::CodeFencedInfoBefore => construct::code_fenced::info_before, - Name::CodeFencedInfo => construct::code_fenced::info, - Name::CodeFencedMetaBefore => construct::code_fenced::meta_before, - Name::CodeFencedMeta => construct::code_fenced::meta, - Name::CodeFencedAtNonLazyBreak => construct::code_fenced::at_non_lazy_break, - Name::CodeFencedCloseStart => construct::code_fenced::close_start, - Name::CodeFencedBeforeSequenceClose => construct::code_fenced::before_sequence_close, - Name::CodeFencedSequenceClose => construct::code_fenced::sequence_close, - Name::CodeFencedAfterSequenceClose => construct::code_fenced::sequence_close_after, - Name::CodeFencedContentBefore => construct::code_fenced::content_before, - Name::CodeFencedContentStart => construct::code_fenced::content_start, - Name::CodeFencedBeforeContentChunk => construct::code_fenced::before_content_chunk, - Name::CodeFencedContentChunk => construct::code_fenced::content_chunk, - Name::CodeFencedAfter => construct::code_fenced::after, + Name::RawFlowStart => construct::raw_flow::start, + Name::RawFlowBeforeSequenceOpen => construct::raw_flow::before_sequence_open, + Name::RawFlowSequenceOpen => construct::raw_flow::sequence_open, + Name::RawFlowInfoBefore => construct::raw_flow::info_before, + Name::RawFlowInfo => construct::raw_flow::info, + Name::RawFlowMetaBefore => construct::raw_flow::meta_before, + Name::RawFlowMeta => construct::raw_flow::meta, + Name::RawFlowAtNonLazyBreak => construct::raw_flow::at_non_lazy_break, + Name::RawFlowCloseStart => construct::raw_flow::close_start, + Name::RawFlowBeforeSequenceClose => construct::raw_flow::before_sequence_close, + Name::RawFlowSequenceClose => construct::raw_flow::sequence_close, + Name::RawFlowAfterSequenceClose => construct::raw_flow::sequence_close_after, + Name::RawFlowContentBefore => construct::raw_flow::content_before, + Name::RawFlowContentStart => construct::raw_flow::content_start, + Name::RawFlowBeforeContentChunk => construct::raw_flow::before_content_chunk, + Name::RawFlowContentChunk => construct::raw_flow::content_chunk, + Name::RawFlowAfter => construct::raw_flow::after, Name::CodeIndentedStart => construct::code_indented::start, Name::CodeIndentedAtBreak => construct::code_indented::at_break, @@ -446,7 +446,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::FlowStart => construct::flow::start, Name::FlowBeforeCodeIndented => construct::flow::before_code_indented, - Name::FlowBeforeCodeFenced => construct::flow::before_code_fenced, + Name::FlowBeforeRaw => construct::flow::before_raw, Name::FlowBeforeHtml => construct::flow::before_html, Name::FlowBeforeHeadingAtx => construct::flow::before_heading_atx, Name::FlowBeforeHeadingSetext => construct::flow::before_heading_setext, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c6a209b..9b73836 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -256,6 +256,8 @@ pub struct TokenizeState<'a> { pub token_4: Name, /// Slot for an event name. pub token_5: Name, + /// Slot for an event name. + pub token_6: Name, } /// A tokenizer itself. @@ -364,6 +366,7 @@ impl<'a> Tokenizer<'a> { token_3: Name::Data, token_4: Name::Data, token_5: Name::Data, + token_6: Name::Data, }, map: EditMap::new(), interrupt: false, diff --git a/src/util/constant.rs b/src/util/constant.rs index 0c82378..f397f38 100644 --- a/src/util/constant.rs +++ b/src/util/constant.rs @@ -1,7 +1,7 @@ //! Constants needed to parse markdown. //! //! Most of these constants are magic numbers, such as the number of markers -//! needed to parse [code (fenced)][code_fenced] +//! needed to parse [code (fenced)][raw_flow] //! ([`CODE_FENCED_SEQUENCE_SIZE_MIN`][]) or the max number of allowed markers //! in a [heading (atx)][heading_atx] //! ([`HEADING_ATX_OPENING_FENCE_SIZE_MAX`][]). @@ -11,7 +11,7 @@ //! ([`HTML_RAW_NAMES`][]), or the list of named character references //! ([`CHARACTER_REFERENCES`][]). //! -//! [code_fenced]: crate::construct::code_fenced +//! [raw_flow]: crate::construct::raw_flow //! [heading_atx]: crate::construct::heading_atx //! [html_flow]: crate::construct::html_flow @@ -60,11 +60,11 @@ pub const CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX: usize = 6; /// [character_reference]: crate::construct::character_reference pub const CHARACTER_REFERENCE_NAMED_SIZE_MAX: usize = 31; -/// The number of markers needed for [code (fenced)][code_fenced] to form. +/// The number of markers needed for [code (fenced)][raw_flow] to form. /// /// Like many things in markdown, the number is `3`. /// -/// [code_fenced]: crate::construct::code_fenced +/// [raw_flow]: crate::construct::raw_flow pub const CODE_FENCED_SEQUENCE_SIZE_MIN: usize = 3; /// The number of markers needed for [frontmatter][] to form. diff --git a/tests/math_flow.rs b/tests/math_flow.rs new file mode 100644 index 0000000..5d161f6 --- /dev/null +++ b/tests/math_flow.rs @@ -0,0 +1,250 @@ +extern crate micromark; +use micromark::{micromark, micromark_with_options, Constructs, Options}; +use pretty_assertions::assert_eq; + +#[test] +fn math_flow() { + let math = Options { + constructs: Constructs { + math_text: true, + math_flow: true, + ..Constructs::default() + }, + ..Options::default() + }; + + assert_eq!( + micromark("$$\na\n$$"), + "<p>$$\na\n$$</p>", + "should not support math (flow) by default" + ); + + assert_eq!( + micromark_with_options("$$\na\n$$", &math), + "<pre><code class=\"language-math math-display\">a\n</code></pre>", + "should support math (flow) if enabled" + ); + + assert_eq!( + micromark_with_options("$$\n<\n >\n$$", &math), + "<pre><code class=\"language-math math-display\"><\n >\n</code></pre>", + "should support math (flow)" + ); + + assert_eq!( + micromark_with_options("$\nfoo\n$", &math), + "<p><code class=\"language-math math-inline\">foo</code></p>", + "should not support math (flow) w/ less than two markers" + ); + + assert_eq!( + micromark_with_options("$$$\naaa\n$$\n$$$$", &math), + "<pre><code class=\"language-math math-display\">aaa\n$$\n</code></pre>", + "should support a closing sequence longer, but not shorter than, the opening" + ); + + assert_eq!( + micromark_with_options("$$", &math), + "<pre><code class=\"language-math math-display\"></code></pre>\n", + "should support an eof right after an opening sequence" + ); + + assert_eq!( + micromark_with_options("$$$\n\n$$\naaa\n", &math), + "<pre><code class=\"language-math math-display\">\n$$\naaa\n</code></pre>\n", + "should support an eof somewhere in content" + ); + + assert_eq!( + micromark_with_options("> $$\n> aaa\n\nbbb", &math), + "<blockquote>\n<pre><code class=\"language-math math-display\">aaa\n</code></pre>\n</blockquote>\n<p>bbb</p>", + "should support no closing sequence in a block quote" + ); + + assert_eq!( + micromark_with_options("$$\n\n \n$$", &math), + "<pre><code class=\"language-math math-display\">\n \n</code></pre>", + "should support blank lines in math (flow)" + ); + + assert_eq!( + micromark_with_options("$$\n$$", &math), + "<pre><code class=\"language-math math-display\"></code></pre>", + "should support empty math (flow)" + ); + + assert_eq!( + micromark_with_options(" $$\n aaa\naaa\n$$", &math), + "<pre><code class=\"language-math math-display\">aaa\naaa\n</code></pre>", + "should remove up to one space from the content if the opening sequence is indented w/ 1 space" + ); + + assert_eq!( + micromark_with_options(" $$\naaa\n aaa\naaa\n $$", &math), + "<pre><code class=\"language-math math-display\">aaa\naaa\naaa\n</code></pre>", + "should remove up to two space from the content if the opening sequence is indented w/ 2 spaces" + ); + + assert_eq!( + micromark_with_options(" $$\n aaa\n aaa\n aaa\n $$", &math), + "<pre><code class=\"language-math math-display\">aaa\n aaa\naaa\n</code></pre>", + "should remove up to three space from the content if the opening sequence is indented w/ 3 spaces" + ); + + assert_eq!( + micromark_with_options(" $$\n aaa\n $$", &math), + "<pre><code>$$\naaa\n$$\n</code></pre>", + "should not support indenteding the opening sequence w/ 4 spaces" + ); + + assert_eq!( + micromark_with_options("$$\naaa\n $$", &math), + "<pre><code class=\"language-math math-display\">aaa\n</code></pre>", + "should support an indented closing sequence" + ); + + assert_eq!( + micromark_with_options(" $$\naaa\n $$", &math), + "<pre><code class=\"language-math math-display\">aaa\n</code></pre>", + "should support a differently indented closing sequence than the opening sequence" + ); + + assert_eq!( + micromark_with_options("$$\naaa\n $$\n", &math), + "<pre><code class=\"language-math math-display\">aaa\n $$\n</code></pre>\n", + "should not support an indented closing sequence w/ 4 spaces" + ); + + assert_eq!( + micromark_with_options("$$ $$\naaa", &math), + "<p><code class=\"language-math math-inline\"> </code>\naaa</p>", + "should not support dollars in the opening fence after the opening sequence" + ); + + assert_eq!( + micromark_with_options("$$$\naaa\n$$$ $$\n", &math), + "<pre><code class=\"language-math math-display\">aaa\n$$$ $$\n</code></pre>\n", + "should not support spaces in the closing sequence" + ); + + assert_eq!( + micromark_with_options("foo\n$$\nbar\n$$\nbaz", &math), + "<p>foo</p>\n<pre><code class=\"language-math math-display\">bar\n</code></pre>\n<p>baz</p>", + "should support interrupting paragraphs" + ); + + assert_eq!( + micromark_with_options("foo\n---\n$$\nbar\n$$\n# baz", &math), + "<h2>foo</h2>\n<pre><code class=\"language-math math-display\">bar\n</code></pre>\n<h1>baz</h1>", + "should support interrupting other content" + ); + + assert_eq!( + micromark_with_options("$$ruby\ndef foo(x)\n return 3\nend\n$$", &math), + "<pre><code class=\"language-math math-display\">def foo(x)\n return 3\nend\n</code></pre>", + "should not support an “info” string (1)" + ); + + assert_eq!( + micromark_with_options("$$$;\n$$$", &math), + "<pre><code class=\"language-math math-display\"></code></pre>", + "should not support an “info” string (2)" + ); + + assert_eq!( + micromark_with_options("$$ ruby startline=3 `%@#`\ndef foo(x)\n return 3\nend\n$$$$", &math), + "<pre><code class=\"language-math math-display\">def foo(x)\n return 3\nend\n</code></pre>", + "should not support an “info” string (3)" + ); + + assert_eq!( + micromark_with_options("$$ aa $$\nfoo", &math), + "<p><code class=\"language-math math-inline\">aa</code>\nfoo</p>", + "should not support dollars in the meta string" + ); + + assert_eq!( + micromark_with_options("$$\n$$ aaa\n$$", &math), + "<pre><code class=\"language-math math-display\">$$ aaa\n</code></pre>", + "should not support meta string on closing sequences" + ); + + // Our own: + assert_eq!( + micromark_with_options("$$ ", &math), + "<pre><code class=\"language-math math-display\"></code></pre>\n", + "should support an eof after whitespace, after the start fence sequence" + ); + + assert_eq!( + micromark_with_options("$$ js\nalert(1)\n$$", &math), + "<pre><code class=\"language-math math-display\">alert(1)\n</code></pre>", + "should support whitespace between the sequence and the meta string" + ); + + assert_eq!( + micromark_with_options("$$js", &math), + "<pre><code class=\"language-math math-display\"></code></pre>\n", + "should support an eof after the meta string" + ); + + assert_eq!( + micromark_with_options("$$ js \nalert(1)\n$$", &math), + "<pre><code class=\"language-math math-display\">alert(1)\n</code></pre>", + "should support whitespace after the meta string" + ); + + assert_eq!( + micromark_with_options("$$\n ", &math), + "<pre><code class=\"language-math math-display\"> \n</code></pre>\n", + "should support an eof after whitespace in content" + ); + + assert_eq!( + micromark_with_options(" $$\n ", &math), + "<pre><code class=\"language-math math-display\"></code></pre>\n", + "should support an eof in the prefix, in content" + ); + + assert_eq!( + micromark_with_options("$$j\\+s©", &math), + "<pre><code class=\"language-math math-display\"></code></pre>\n", + "should support character escapes and character references in meta strings" + ); + + assert_eq!( + micromark_with_options("$$a\\&b\0c", &math), + "<pre><code class=\"language-math math-display\"></code></pre>\n", + "should support dangerous characters in meta strings" + ); + + assert_eq!( + micromark_with_options(" $$\naaa\n $$", &math), + "<pre><code class=\"language-math math-display\">aaa\n $$\n</code></pre>\n", + "should not support a closing sequence w/ too much indent, regardless of opening sequence (1)" + ); + + assert_eq!( + micromark_with_options("> $$\n>\n>\n>\n\na", &math), + "<blockquote>\n<pre><code class=\"language-math math-display\">\n\n\n</code></pre>\n</blockquote>\n<p>a</p>", + "should not support a closing sequence w/ too much indent, regardless of opening sequence (2)" + ); + + assert_eq!( + micromark_with_options("> $$a\nb", &math), + "<blockquote>\n<pre><code class=\"language-math math-display\"></code></pre>\n</blockquote>\n<p>b</p>", + "should not support lazyness (1)" + ); + + assert_eq!( + micromark_with_options("> a\n$$b", &math), + "<blockquote>\n<p>a</p>\n</blockquote>\n<pre><code class=\"language-math math-display\"></code></pre>\n", + "should not support lazyness (2)" + ); + + assert_eq!( + micromark_with_options("> $$a\n$$", &math), + "<blockquote>\n<pre><code class=\"language-math math-display\"></code></pre>\n</blockquote>\n<pre><code class=\"language-math math-display\"></code></pre>\n", + "should not support lazyness (3)" + ); +} diff --git a/tests/math_text.rs b/tests/math_text.rs index d0e7589..4fe0288 100644 --- a/tests/math_text.rs +++ b/tests/math_text.rs @@ -7,7 +7,7 @@ fn math_text() { let math = Options { constructs: Constructs { math_text: true, - // To do: enable `math_flow`. + math_flow: true, ..Constructs::default() }, ..Options::default() @@ -21,7 +21,7 @@ fn math_text() { assert_eq!( micromark_with_options("$foo$ $$bar$$", &math), - "<p><code class=\"lang-math math-inline\">foo</code> <code class=\"lang-math math-inline\">bar</code></p>", + "<p><code class=\"language-math math-inline\">foo</code> <code class=\"language-math math-inline\">bar</code></p>", "should support math (text) if enabled" ); @@ -33,103 +33,97 @@ fn math_text() { ..math.clone() } ), - "<p>$foo$ <code class=\"lang-math math-inline\">bar</code></p>", + "<p>$foo$ <code class=\"language-math math-inline\">bar</code></p>", "should not support math (text) w/ a single dollar, w/ `math_text_single_dollar: false`" ); - // assert_eq!( - // micromark_with_options("$foo$", &math), - // "<p><code class=\"lang-math math-inline\">foo</code></p>", - // "should support math (text)" - // ); - assert_eq!( micromark_with_options("$$ foo $ bar $$", &math), - "<p><code class=\"lang-math math-inline\">foo $ bar</code></p>", + "<p><code class=\"language-math math-inline\">foo $ bar</code></p>", "should support math (text) w/ more dollars" ); assert_eq!( micromark_with_options("$ $$ $", &math), - "<p><code class=\"lang-math math-inline\">$$</code></p>", + "<p><code class=\"language-math math-inline\">$$</code></p>", "should support math (text) w/ fences inside, and padding" ); assert_eq!( micromark_with_options("$ $$ $", &math), - "<p><code class=\"lang-math math-inline\"> $$ </code></p>", + "<p><code class=\"language-math math-inline\"> $$ </code></p>", "should support math (text) w/ extra padding" ); assert_eq!( micromark_with_options("$ a$", &math), - "<p><code class=\"lang-math math-inline\"> a</code></p>", + "<p><code class=\"language-math math-inline\"> a</code></p>", "should support math (text) w/ unbalanced padding" ); assert_eq!( micromark_with_options("$\u{a0}b\u{a0}$", &math), - "<p><code class=\"lang-math math-inline\">\u{a0}b\u{a0}</code></p>", + "<p><code class=\"language-math math-inline\">\u{a0}b\u{a0}</code></p>", "should support math (text) w/ non-padding whitespace" ); assert_eq!( micromark_with_options("$ $\n$ $", &math), - "<p><code class=\"lang-math math-inline\"> </code>\n<code class=\"lang-math math-inline\"> </code></p>", + "<p><code class=\"language-math math-inline\"> </code>\n<code class=\"language-math math-inline\"> </code></p>", "should support math (text) w/o data" ); assert_eq!( - micromark_with_options("$$\nfoo\nbar \nbaz\n$$", &math), - "<p><code class=\"lang-math math-inline\">foo bar baz</code></p>", + micromark_with_options("$\nfoo\nbar \nbaz\n$", &math), + "<p><code class=\"language-math math-inline\">foo bar baz</code></p>", "should support math (text) w/o line endings (1)" ); assert_eq!( - micromark_with_options("$$\nfoo \n$$", &math), - "<p><code class=\"lang-math math-inline\">foo </code></p>", + micromark_with_options("$\nfoo \n$", &math), + "<p><code class=\"language-math math-inline\">foo </code></p>", "should support math (text) w/o line endings (2)" ); assert_eq!( micromark_with_options("$foo bar \nbaz$", &math), - "<p><code class=\"lang-math math-inline\">foo bar baz</code></p>", + "<p><code class=\"language-math math-inline\">foo bar baz</code></p>", "should not support whitespace collapsing" ); assert_eq!( micromark_with_options("$foo\\$bar$", &math), - "<p><code class=\"lang-math math-inline\">foo\\</code>bar$</p>", + "<p><code class=\"language-math math-inline\">foo\\</code>bar$</p>", "should not support character escapes" ); assert_eq!( micromark_with_options("$$foo$bar$$", &math), - "<p><code class=\"lang-math math-inline\">foo$bar</code></p>", + "<p><code class=\"language-math math-inline\">foo$bar</code></p>", "should support more dollars" ); assert_eq!( micromark_with_options("$ foo $$ bar $", &math), - "<p><code class=\"lang-math math-inline\">foo $$ bar</code></p>", + "<p><code class=\"language-math math-inline\">foo $$ bar</code></p>", "should support less dollars" ); assert_eq!( micromark_with_options("*foo$*$", &math), - "<p>*foo<code class=\"lang-math math-inline\">*</code></p>", + "<p>*foo<code class=\"language-math math-inline\">*</code></p>", "should precede over emphasis" ); assert_eq!( micromark_with_options("[not a $link](/foo$)", &math), - "<p>[not a <code class=\"lang-math math-inline\">link](/foo</code>)</p>", + "<p>[not a <code class=\"language-math math-inline\">link](/foo</code>)</p>", "should precede over links" ); assert_eq!( micromark_with_options("$<a href=\"$\">$", &math), - "<p><code class=\"lang-math math-inline\"><a href="</code>">$</p>", + "<p><code class=\"language-math math-inline\"><a href="</code>">$</p>", "should have same precedence as HTML (1)" ); @@ -148,7 +142,7 @@ fn math_text() { assert_eq!( micromark_with_options("$<http://foo.bar.$baz>$", &math), - "<p><code class=\"lang-math math-inline\"><http://foo.bar.</code>baz>$</p>", + "<p><code class=\"language-math math-inline\"><http://foo.bar.</code>baz>$</p>", "should have same precedence as autolinks (1)" ); @@ -172,19 +166,19 @@ fn math_text() { assert_eq!( micromark_with_options("$foo$$bar$$", &math), - "<p>$foo<code class=\"lang-math math-inline\">bar</code></p>", + "<p>$foo<code class=\"language-math math-inline\">bar</code></p>", "should not support no closing fence (2)" ); assert_eq!( micromark_with_options("$foo\t\tbar$", &math), - "<p><code class=\"lang-math math-inline\">foo\t\tbar</code></p>", + "<p><code class=\"language-math math-inline\">foo\t\tbar</code></p>", "should support tabs in code" ); assert_eq!( micromark_with_options("\\$$x$", &math), - "<p>$<code class=\"lang-math math-inline\">x</code></p>", + "<p>$<code class=\"language-math math-inline\">x</code></p>", "should support an escaped initial dollar" ); } |