diff options
Diffstat (limited to '')
-rw-r--r-- | src/compiler.rs | 3 | ||||
-rw-r--r-- | src/construct/flow.rs | 8 | ||||
-rw-r--r-- | src/construct/mdx_esm.rs | 224 | ||||
-rw-r--r-- | src/construct/mod.rs | 2 | ||||
-rw-r--r-- | src/construct/partial_mdx_expression.rs | 98 | ||||
-rw-r--r-- | src/construct/partial_mdx_jsx.rs | 3 | ||||
-rw-r--r-- | src/event.rs | 42 | ||||
-rw-r--r-- | src/lib.rs | 180 | ||||
-rw-r--r-- | src/state.rs | 20 | ||||
-rw-r--r-- | src/tokenizer.rs | 4 | ||||
-rw-r--r-- | src/util/mdx_collect.rs | 70 | ||||
-rw-r--r-- | src/util/mod.rs | 1 |
12 files changed, 628 insertions, 27 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index d1ac774..eaa15ee 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -364,6 +364,7 @@ fn enter(context: &mut CompileContext) { | Name::HeadingAtxText | Name::HeadingSetextText | Name::Label + | Name::MdxEsm | Name::MdxFlowExpression | Name::MdxTextExpression | Name::MdxJsxFlowTag @@ -412,7 +413,7 @@ fn exit(context: &mut CompileContext) { | Name::Resource => { on_exit_drop(context); } - Name::MdxFlowExpression | Name::MdxJsxFlowTag => on_exit_drop_slurp(context), + Name::MdxEsm | Name::MdxFlowExpression | Name::MdxJsxFlowTag => on_exit_drop_slurp(context), Name::CharacterEscapeValue | Name::CodeTextData | Name::Data | Name::MathTextData => { on_exit_data(context); } diff --git a/src/construct/flow.rs b/src/construct/flow.rs index 08e0466..d6a79d8 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -15,6 +15,7 @@ //! * [Heading (atx)][crate::construct::heading_atx] //! * [Heading (setext)][crate::construct::heading_setext] //! * [HTML (flow)][crate::construct::html_flow] +//! * [MDX esm][crate::construct::mdx_esm] //! * [MDX expression (flow)][crate::construct::mdx_expression_flow] //! * [MDX JSX (flow)][crate::construct::mdx_jsx_flow] //! * [Raw (flow)][crate::construct::raw_flow] (code (fenced), math (flow)) @@ -66,6 +67,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::HtmlFlowStart) } + Some(b'e' | b'i') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeContent), + ); + State::Retry(StateName::MdxEsmStart) + } Some(b'{') => { tokenizer.attempt( State::Next(StateName::FlowAfter), diff --git a/src/construct/mdx_esm.rs b/src/construct/mdx_esm.rs new file mode 100644 index 0000000..53f8beb --- /dev/null +++ b/src/construct/mdx_esm.rs @@ -0,0 +1,224 @@ +//! MDX ESM occurs in the [flow][] content type. +//! +//! ## Grammar +//! +//! MDX expression (flow) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! mdx_esm ::= word *line *(eol *line) +//! +//! word ::= 'e' 'x' 'p' 'o' 'r' 't' | 'i' 'm' 'p' 'o' 'r' 't' +//! ``` +//! +//! This construct must be followed by a blank line or eof (end of file). +//! It can include blank lines if [`MdxEsmParse`][crate::MdxEsmParse] passed in +//! `options.mdx_esm_parse` allows it. +//! +//! ## Tokens +//! +//! * [`LineEnding`][Name::LineEnding] +//! * [`MdxEsm`][Name::MdxEsm] +//! * [`MdxEsmData`][Name::MdxEsmData] +//! +//! ## References +//! +//! * [`syntax.js` in `micromark-extension-mdxjs-esm`](https://github.com/micromark/micromark-extension-mdxjs-esm/blob/main/dev/lib/syntax.js) +//! * [`mdxjs.com`](https://mdxjs.com) +//! +//! [flow]: crate::construct::flow + +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; +use crate::util::{ + mdx_collect::{collect, place_to_point}, + slice::Slice, +}; +use crate::MdxSignal; +use alloc::format; + +/// Start of MDX ESM. +/// +/// ```markdown +/// > | import a from 'b' +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + // If it’s turned on. + if tokenizer.parse_state.options.constructs.mdx_esm + // If there is a gnostic parser. + && tokenizer.parse_state.options.mdx_esm_parse.is_some() + // When not interrupting. + && !tokenizer.interrupt + // Only at the start of a line, not at whitespace or in a container. + && tokenizer.point.column == 1 + && matches!(tokenizer.current, Some(b'e' | b'i')) + { + // Place where keyword starts. + tokenizer.tokenize_state.start = tokenizer.point.index; + tokenizer.enter(Name::MdxEsm); + tokenizer.enter(Name::MdxEsmData); + tokenizer.consume(); + State::Next(StateName::MdxEsmWord) + } else { + State::Nok + } +} + +/// In keyword. +/// +/// ```markdown +/// > | import a from 'b' +/// ^^^^^^ +/// ``` +pub fn word(tokenizer: &mut Tokenizer) -> State { + if matches!(tokenizer.current, Some(b'a'..=b'z')) { + tokenizer.consume(); + State::Next(StateName::MdxEsmWord) + } else { + let slice = Slice::from_indices( + tokenizer.parse_state.bytes, + tokenizer.tokenize_state.start, + tokenizer.point.index, + ); + + if matches!(slice.as_str(), "export" | "import") && tokenizer.current == Some(b' ') { + tokenizer.concrete = true; + tokenizer.tokenize_state.start = tokenizer.events.len() - 1; + tokenizer.consume(); + State::Next(StateName::MdxEsmInside) + } else { + tokenizer.tokenize_state.start = 0; + State::Nok + } + } +} + +/// In data. +/// +/// ```markdown +/// > | import a from 'b' +/// ^ +/// ``` +pub fn inside(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::MdxEsmData); + State::Retry(StateName::MdxEsmLineStart) + } + _ => { + tokenizer.consume(); + State::Next(StateName::MdxEsmInside) + } + } +} + +/// At start of line. +/// +/// ```markdown +/// | import a from 'b' +/// > | export {a} +/// ^ +/// ``` +pub fn line_start(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => State::Retry(StateName::MdxEsmAtEnd), + Some(b'\n') => { + tokenizer.check( + State::Next(StateName::MdxEsmAtEnd), + State::Next(StateName::MdxEsmContinuationStart), + ); + State::Retry(StateName::MdxEsmBlankLineBefore) + } + _ => { + tokenizer.enter(Name::MdxEsmData); + tokenizer.consume(); + State::Next(StateName::MdxEsmInside) + } + } +} + +/// At start of line that continues. +/// +/// ```markdown +/// | import a from 'b' +/// > | export {a} +/// ^ +/// ``` +pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::MdxEsmLineStart) +} + +/// At start of a potentially blank line. +/// +/// ```markdown +/// | import a from 'b' +/// > | export {a} +/// ^ +/// ``` +pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::BlankLineStart) +} + +/// At end of line (blank or eof). +/// +/// ```markdown +/// > | import a from 'b' +/// ^ +/// ``` +pub fn at_end(tokenizer: &mut Tokenizer) -> State { + let result = parse_esm(tokenizer); + + // Done!. + if matches!(result, State::Ok) { + tokenizer.concrete = false; + tokenizer.exit(Name::MdxEsm); + } + + result +} + +/// Parse ESM with a given function. +fn parse_esm(tokenizer: &mut Tokenizer) -> State { + // We can `unwrap` because we don’t parse if this is `None`. + let parse = tokenizer + .parse_state + .options + .mdx_esm_parse + .as_ref() + .unwrap(); + + // Collect the body of the ESM and positional info for each run of it. + let result = collect( + tokenizer, + tokenizer.tokenize_state.start, + &[Name::MdxEsmData, Name::LineEnding], + ); + + // Parse and handle what was signaled back. + match parse(&result.value) { + MdxSignal::Ok => State::Ok, + MdxSignal::Error(message, place) => { + let point = place_to_point(&result, place); + State::Error(format!("{}:{}: {}", point.line, point.column, message)) + } + MdxSignal::Eof(message) => { + if tokenizer.current == None { + State::Error(format!( + "{}:{}: {}", + tokenizer.point.line, tokenizer.point.column, message + )) + } else { + tokenizer.tokenize_state.mdx_last_parse_error = Some(message); + State::Retry(StateName::MdxEsmContinuationStart) + } + } + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ae6facf..88f3050 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -66,6 +66,7 @@ //! * [gfm task list item check][gfm_task_list_item_check] //! * [mdx expression (flow)][mdx_expression_flow] //! * [mdx expression (text)][mdx_expression_text] +//! * [mdx esm][mdx_esm] //! * [mdx jsx (flow)][mdx_jsx_flow] //! * [mdx jsx (text)][mdx_jsx_text] //! @@ -169,6 +170,7 @@ pub mod label_end; pub mod label_start_image; pub mod label_start_link; pub mod list_item; +pub mod mdx_esm; pub mod mdx_expression_flow; pub mod mdx_expression_text; pub mod mdx_jsx_flow; diff --git a/src/construct/partial_mdx_expression.rs b/src/construct/partial_mdx_expression.rs index 31a9af8..3ebd0f0 100644 --- a/src/construct/partial_mdx_expression.rs +++ b/src/construct/partial_mdx_expression.rs @@ -14,7 +14,6 @@ //! ## Tokens //! //! * [`LineEnding`][Name::LineEnding] -//! * [`SpaceOrTab`][Name::SpaceOrTab] //! * [`MdxExpressionMarker`][Name::MdxExpressionMarker] //! * [`MdxExpressionData`][Name::MdxExpressionData] //! @@ -61,7 +60,12 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max; use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -use alloc::format; +use crate::util::{ + constant::TAB_SIZE, + mdx_collect::{collect, place_to_point}, +}; +use crate::{MdxExpressionKind, MdxExpressionParse, MdxSignal}; +use alloc::{format, string::ToString}; /// Start of an MDX expression. /// @@ -75,6 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Name::MdxExpressionMarker); tokenizer.consume(); tokenizer.exit(Name::MdxExpressionMarker); + tokenizer.tokenize_state.start = tokenizer.events.len() - 1; State::Next(StateName::MdxExpressionBefore) } @@ -88,8 +93,10 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { State::Error(format!( - "{}:{}: Unexpected end of file in expression, expected a corresponding closing brace for `{{`", - tokenizer.point.line, tokenizer.point.column + "{}:{}: {}", + tokenizer.point.line, tokenizer.point.column, + tokenizer.tokenize_state.mdx_last_parse_error.take() + .unwrap_or_else(|| "Unexpected end of file in expression, expected a corresponding closing brace for `{`".to_string()) )) } Some(b'\n') => { @@ -97,24 +104,26 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(Name::LineEnding); State::Next(StateName::MdxExpressionEolAfter) - }, + } Some(b'}') if tokenizer.tokenize_state.size == 0 => { - if tokenizer.tokenize_state.token_1 == Name::MdxJsxTagAttributeValueExpression && !tokenizer.tokenize_state.seen { - State::Error(format!( - "{}:{}: Unexpected empty expression, expected a value between braces", - tokenizer.point.line, tokenizer.point.column - )) + let state = if let Some(ref parse) = tokenizer.parse_state.options.mdx_expression_parse + { + parse_expression(tokenizer, parse) } else { - tokenizer.tokenize_state.seen = false; + State::Ok + }; + + if state == State::Ok { + tokenizer.tokenize_state.start = 0; tokenizer.enter(Name::MdxExpressionMarker); tokenizer.consume(); tokenizer.exit(Name::MdxExpressionMarker); tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); - State::Ok } - }, + + state + } Some(_) => { - tokenizer.tokenize_state.seen = true; tokenizer.enter(Name::MdxExpressionData); State::Retry(StateName::MdxExpressionInside) } @@ -134,8 +143,10 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Name::MdxExpressionData); State::Retry(StateName::MdxExpressionBefore) } else { - // To do: don’t count if gnostic. - if tokenizer.current == Some(b'{') { + // Don’t count if gnostic. + if tokenizer.current == Some(b'{') + && tokenizer.parse_state.options.mdx_expression_parse.is_none() + { tokenizer.tokenize_state.size += 1; } else if tokenizer.current == Some(b'}') { tokenizer.tokenize_state.size -= 1; @@ -165,9 +176,60 @@ pub fn eol_after(tokenizer: &mut Tokenizer) -> State { )) } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt(State::Next(StateName::MdxExpressionBefore), State::Nok); - // To do: use `start_column` + constants.tabSize for max space to eat. - State::Next(space_or_tab_min_max(tokenizer, 0, usize::MAX)) + // Idea: investigate if we’d need to use more complex stripping. + // Take this example: + // + // ```markdown + // > aaa <b c={` + // > d + // > `} /> eee + // ``` + // + // Currently, the “paragraph” starts at `> | aaa`, so for the next line + // here we split it into `>␠|␠␠␠␠|␠d` (prefix, this indent here, + // expression data). + // The intention above is likely for the split to be as `>␠␠|␠␠␠␠|d`, + // which is impossible, but we can mimick it with `>␠|␠␠␠␠␠|d`. + // + // To improve the situation, we could take `tokenizer.line_start` at + // the start of the expression and move past whitespace. + // For future lines, we’d move at most to + // `line_start_shifted.column + 4`. + State::Retry(space_or_tab_min_max(tokenizer, 0, TAB_SIZE)) } else { State::Retry(StateName::MdxExpressionBefore) } } + +/// Parse an expression with a given function. +fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> State { + // Collect the body of the expression and positional info for each run of it. + let result = collect( + tokenizer, + tokenizer.tokenize_state.start, + &[Name::MdxExpressionData, Name::LineEnding], + ); + + // Turn the name of the expression into a kind. + let kind = match tokenizer.tokenize_state.token_1 { + Name::MdxFlowExpression | Name::MdxTextExpression => MdxExpressionKind::Expression, + Name::MdxJsxTagAttributeExpression => MdxExpressionKind::AttributeExpression, + Name::MdxJsxTagAttributeValueExpression => MdxExpressionKind::AttributeValueExpression, + _ => unreachable!("cannot handle unknown expression name"), + }; + + // Parse and handle what was signaled back. + match parse(&result.value, kind) { + MdxSignal::Ok => State::Ok, + MdxSignal::Error(message, place) => { + let point = place_to_point(&result, place); + State::Error(format!("{}:{}: {}", point.line, point.column, message)) + } + MdxSignal::Eof(message) => { + tokenizer.tokenize_state.mdx_last_parse_error = Some(message); + tokenizer.enter(Name::MdxExpressionData); + tokenizer.consume(); + State::Next(StateName::MdxExpressionInside) + } + } +} diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs index 9177b5b..e49a8e0 100644 --- a/src/construct/partial_mdx_jsx.rs +++ b/src/construct/partial_mdx_jsx.rs @@ -611,8 +611,6 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State { Some(b'>') => State::Retry(StateName::MdxJsxTagEnd), // Attribute expression. Some(b'{') => { - // To do: force `spread: true` if gnostic. - // To do: pass `start_point` if gnostic. tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone(); tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeExpression; tokenizer.attempt( @@ -886,7 +884,6 @@ pub fn attribute_value_before(tokenizer: &mut Tokenizer) -> State { } // Attribute value expression. Some(b'{') => { - // To do: pass `start_point` if gnostic. tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone(); tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeValueExpression; tokenizer.attempt( diff --git a/src/event.rs b/src/event.rs index a2626ee..b3fa9ae 100644 --- a/src/event.rs +++ b/src/event.rs @@ -2391,6 +2391,45 @@ pub enum Name { /// ^ ^ /// ``` MathTextSequence, + /// MDX extension: ESM. + /// + /// ## Info + /// + /// * **Context**: + /// [flow content][crate::construct::flow] + /// * **Content model**: + /// void + /// [`MdxEsmData`][Name::MdxEsmData], + /// [`SpaceOrTab`][Name::SpaceOrTab], + /// [`LineEnding`][Name::LineEnding] + /// * **Construct**: + /// [`mdx_esm`][crate::construct::mdx_esm] + /// + /// ## Example + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + MdxEsm, + /// MDX extension: ESM data. + /// + /// ## Info + /// + /// * **Context**: + /// [`MdxEsm`][Name::MdxEsm] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`mdx_esm`][crate::construct::mdx_esm] + /// + /// ## Example + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + MdxEsmData, /// MDX extension: expression marker. /// /// ## Info @@ -3336,7 +3375,7 @@ pub enum Name { } /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 75] = [ +pub const VOID_EVENTS: [Name; 76] = [ Name::AttentionSequence, Name::AutolinkEmail, Name::AutolinkMarker, @@ -3391,6 +3430,7 @@ pub const VOID_EVENTS: [Name; 75] = [ Name::MathFlowChunk, Name::MathTextData, Name::MathTextSequence, + Name::MdxEsmData, Name::MdxExpressionMarker, Name::MdxExpressionData, Name::MdxJsxTagMarker, @@ -20,7 +20,7 @@ mod util; use crate::compiler::compile; use crate::parser::parse; -use alloc::string::String; +use alloc::{boxed::Box, fmt, string::String}; /// Type of line endings in markdown. #[derive(Clone, Debug, Default, Eq, PartialEq)] @@ -79,6 +79,71 @@ impl LineEnding { } } +/// Signal used as feedback when parsing MDX esm/expressions. +#[derive(Clone, Debug)] +pub enum MdxSignal { + /// A syntax error. + /// + /// `micromark-rs` will crash with error message `String`, and convert the + /// `usize` (byte offset into `&str` passed to `MdxExpressionParse` or + /// `MdxEsmParse`) to where it happened in the whole document. + /// + /// ## Examples + /// + /// ```rust ignore + /// MdxSignal::Error("Unexpected `\"`, expected identifier".to_string(), 1) + /// ``` + Error(String, usize), + /// An error at the end of the (partial?) expression. + /// + /// `micromark-rs` will either crash with error message `String` if it + /// doesn’t have any more text, or it will try again later when more text + /// is available. + /// + /// ## Examples + /// + /// ```rust ignore + /// MdxSignal::Eof("Unexpected end of file in string literal".to_string()) + /// ``` + Eof(String), + /// Done, successfully. + /// + /// `micromark-rs` knows that this is the end of a valid expression/esm and + /// continues with markdown. + /// + /// ## Examples + /// + /// ```rust ignore + /// MdxSignal::Ok + /// ``` + Ok, +} + +/// Expression kind. +#[derive(Clone, Debug)] +pub enum MdxExpressionKind { + /// Kind of expressions in prose: `# {Math.PI}` and `{Math.PI}`. + Expression, + /// Kind of expressions as attributes: `<a {...b}>` + AttributeExpression, + /// Kind of expressions as attribute values: `<a b={c}>`. + AttributeValueExpression, +} + +/// Signature of a function that parses expressions. +/// +/// Can be passed as `mdx_expression_parse` in [`Options`][] to support +/// expressions according to a certain grammar (typically, a programming +/// language). +pub type MdxExpressionParse = dyn Fn(&str, MdxExpressionKind) -> MdxSignal; + +/// Signature of a function that parses ESM. +/// +/// Can be passed as `mdx_esm_parse` in [`Options`][] to support +/// ESM according to a certain grammar (typically, a programming +/// language). +pub type MdxEsmParse = dyn Fn(&str) -> MdxSignal; + /// Control which constructs are enabled. /// /// Not all constructs can be configured. @@ -301,12 +366,28 @@ pub struct Constructs { /// ^^^ /// ``` pub math_text: bool, + /// MDX: ESM. + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + /// + /// > 👉 **Note**: you *must* pass [`options.mdx_esm_parse`][MdxEsmParse] + /// > too. + /// > Otherwise, this option has no affect. + pub mdx_esm: bool, /// MDX: expression (flow). /// /// ```markdown /// > | {Math.PI} /// ^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). pub mdx_expression_flow: bool, /// MDX: expression (text). /// @@ -314,6 +395,11 @@ pub struct Constructs { /// > | a {Math.PI} c /// ^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). pub mdx_expression_text: bool, /// MDX: JSX (flow). /// @@ -321,6 +407,11 @@ pub struct Constructs { /// > | <Component /> /// ^^^^^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions in JSX according to a certain grammar + /// > (typically, a programming language). pub mdx_jsx_flow: bool, /// MDX: JSX (text). /// @@ -328,6 +419,11 @@ pub struct Constructs { /// > | a <Component /> c /// ^^^^^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions in JSX according to a certain grammar + /// > (typically, a programming language). pub mdx_jsx_text: bool, /// Thematic break. /// @@ -370,6 +466,7 @@ impl Default for Constructs { list_item: true, math_flow: false, math_text: false, + mdx_esm: false, mdx_expression_flow: false, mdx_expression_text: false, mdx_jsx_flow: false, @@ -405,6 +502,13 @@ impl Constructs { /// This turns on `CommonMark`, turns off some conflicting constructs /// (autolinks, code (indented), html), and turns on MDX (JSX, /// expressions, ESM). + /// + /// > 👉 **Note**: you *must* pass [`options.mdx_esm_parse`][MdxEsmParse] + /// > to support ESM. + /// > You *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). #[must_use] pub fn mdx() -> Self { Self { @@ -412,6 +516,7 @@ impl Constructs { code_indented: false, html_flow: false, html_text: false, + mdx_esm: true, mdx_expression_flow: true, mdx_expression_text: true, mdx_jsx_flow: true, @@ -423,8 +528,8 @@ impl Constructs { /// Configuration (optional). #[allow(clippy::struct_excessive_bools)] -#[derive(Clone, Debug)] pub struct Options { + // Note: when adding fields, don’t forget to add them to `fmt::Debug` below. /// Whether to allow (dangerous) HTML. /// The default is `false`, you can turn it on to `true` for trusted /// content. @@ -913,6 +1018,75 @@ pub struct Options { /// # } /// ``` pub math_text_single_dollar: bool, + + /// Function to parse expressions with. + /// + /// This can be used to parse expressions with a parser. + /// It can be used to support for arbitrary programming languages within + /// expressions. + /// + /// For an example that adds support for JavaScript with SWC, see + /// `tests/test_utils/mod.rs`. + pub mdx_expression_parse: Option<Box<MdxExpressionParse>>, + + /// Function to parse ESM with. + /// + /// This can be used to parse ESM with a parser. + /// It can be used to support for arbitrary programming languages within + /// ESM, however, the keywords (`export`, `import`) are currently hardcoded + /// JavaScript-specific. + /// + /// For an example that adds support for JavaScript with SWC, see + /// `tests/test_utils/mod.rs`. + pub mdx_esm_parse: Option<Box<MdxEsmParse>>, + // Note: when adding fields, don’t forget to add them to `fmt::Debug` below. +} + +impl fmt::Debug for Options { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Options") + .field("allow_dangerous_html", &self.allow_dangerous_html) + .field("allow_dangerous_protocol", &self.allow_dangerous_protocol) + .field("constructs", &self.constructs) + .field("default_line_ending", &self.default_line_ending) + .field("gfm_footnote_label", &self.gfm_footnote_label) + .field( + "gfm_footnote_label_tag_name", + &self.gfm_footnote_label_tag_name, + ) + .field( + "gfm_footnote_label_attributes", + &self.gfm_footnote_label_attributes, + ) + .field("gfm_footnote_back_label", &self.gfm_footnote_back_label) + .field( + "gfm_footnote_clobber_prefix", + &self.gfm_footnote_clobber_prefix, + ) + .field( + "gfm_strikethrough_single_tilde", + &self.gfm_strikethrough_single_tilde, + ) + .field("gfm_tagfilter", &self.gfm_tagfilter) + .field("math_text_single_dollar", &self.math_text_single_dollar) + .field( + "mdx_expression_parse", + if self.mdx_expression_parse.is_none() { + &"None" + } else { + &"Some([Function])" + }, + ) + .field( + "mdx_esm_parse", + if self.mdx_esm_parse.is_none() { + &"None" + } else { + &"Some([Function])" + }, + ) + .finish() + } } impl Default for Options { @@ -931,6 +1105,8 @@ impl Default for Options { gfm_strikethrough_single_tilde: true, gfm_tagfilter: false, math_text_single_dollar: true, + mdx_expression_parse: None, + mdx_esm_parse: None, } } } diff --git a/src/state.rs b/src/state.rs index 896761e..1cc2720 100644 --- a/src/state.rs +++ b/src/state.rs @@ -344,6 +344,14 @@ pub enum Name { ListItemContBlank, ListItemContFilled, + MdxEsmStart, + MdxEsmWord, + MdxEsmInside, + MdxEsmLineStart, + MdxEsmBlankLineBefore, + MdxEsmContinuationStart, + MdxEsmAtEnd, + MdxExpressionTextStart, MdxExpressionTextAfter, @@ -356,8 +364,6 @@ pub enum Name { MdxExpressionBefore, MdxExpressionInside, MdxExpressionEolAfter, - MdxJsxAttributeValueExpressionAfter, - MdxJsxAttributeExpressionAfter, MdxJsxFlowStart, MdxJsxFlowBefore, @@ -385,6 +391,7 @@ pub enum Name { MdxJsxLocalNameAfter, MdxJsxAttributeBefore, MdxJsxSelfClosing, + MdxJsxAttributeExpressionAfter, MdxJsxAttributePrimaryName, MdxJsxAttributePrimaryNameAfter, MdxJsxAttributeLocalNameBefore, @@ -393,6 +400,7 @@ pub enum Name { MdxJsxAttributeValueBefore, MdxJsxAttributeValueQuotedStart, MdxJsxAttributeValueQuoted, + MdxJsxAttributeValueExpressionAfter, NonLazyContinuationStart, NonLazyContinuationAfter, @@ -822,6 +830,14 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::ListItemContBlank => construct::list_item::cont_blank, Name::ListItemContFilled => construct::list_item::cont_filled, + Name::MdxEsmStart => construct::mdx_esm::start, + Name::MdxEsmWord => construct::mdx_esm::word, + Name::MdxEsmInside => construct::mdx_esm::inside, + Name::MdxEsmLineStart => construct::mdx_esm::line_start, + Name::MdxEsmBlankLineBefore => construct::mdx_esm::blank_line_before, + Name::MdxEsmContinuationStart => construct::mdx_esm::continuation_start, + Name::MdxEsmAtEnd => construct::mdx_esm::at_end, + Name::MdxExpressionStart => construct::partial_mdx_expression::start, Name::MdxExpressionBefore => construct::partial_mdx_expression::before, Name::MdxExpressionInside => construct::partial_mdx_expression::inside, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8441f7e..84d3d6d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -227,6 +227,9 @@ pub struct TokenizeState<'a> { /// List of defined GFM footnote definition identifiers. pub gfm_footnote_definitions: Vec<String>, + // Last error message provided at an EOF of an expression. + pub mdx_last_parse_error: Option<String>, + /// Whether to connect events. pub connect: bool, /// Marker. @@ -343,6 +346,7 @@ impl<'a> Tokenizer<'a> { document_at_first_paragraph_of_list_item: false, definitions: vec![], gfm_footnote_definitions: vec![], + mdx_last_parse_error: None, end: 0, label_starts: vec![], label_starts_loose: vec![], diff --git a/src/util/mdx_collect.rs b/src/util/mdx_collect.rs new file mode 100644 index 0000000..73ead51 --- /dev/null +++ b/src/util/mdx_collect.rs @@ -0,0 +1,70 @@ +//! Collect info for MDX. + +use crate::event::{Kind, Name, Point}; +use crate::tokenizer::Tokenizer; +use crate::util::slice::{Position, Slice}; +use alloc::{string::String, vec, vec::Vec}; + +pub type Location<'a> = (usize, &'a Point); + +pub struct Result<'a> { + pub start: &'a Point, + pub value: String, + pub locations: Vec<Location<'a>>, +} + +pub fn collect<'a>(tokenizer: &'a Tokenizer, from: usize, names: &[Name]) -> Result<'a> { + let mut result = Result { + start: &tokenizer.events[from].point, + value: String::new(), + locations: vec![], + }; + let mut index = from; + let mut acc = 0; + + while index < tokenizer.events.len() { + if tokenizer.events[index].kind == Kind::Enter + && names.contains(&tokenizer.events[index].name) + { + // Include virtual spaces. + let value = Slice::from_position( + tokenizer.parse_state.bytes, + &Position { + start: &tokenizer.events[index].point, + end: &tokenizer.events[index + 1].point, + }, + ) + .serialize(); + acc += value.len(); + result.locations.push((acc, &tokenizer.events[index].point)); + result.value.push_str(&value); + } + + index += 1; + } + + result +} + +// Turn an index of `result.value` into a point in the whole document. +pub fn place_to_point(result: &Result, place: usize) -> Point { + let mut index = 0; + let mut point = result.start; + let mut rest = place; + + while index < result.locations.len() { + point = result.locations[index].1; + + if result.locations[index].0 > place { + break; + } + + rest = place - result.locations[index].0; + index += 1; + } + + let mut point = point.clone(); + point.column += rest; + point.index += rest; + point +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 2ea372c..6281356 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -6,6 +6,7 @@ pub mod decode_character_reference; pub mod edit_map; pub mod encode; pub mod gfm_tagfilter; +pub mod mdx_collect; pub mod normalize_identifier; pub mod sanitize_uri; pub mod skip; |