From fe618ff6e38ec0ed4da72a3935fd9ea64ee1cef5 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 19 Sep 2022 11:17:26 +0200 Subject: Add support for parsing MDX ESM, expressions This commit adds support for hooks that lets a user integrate another parser with `micromark-rs`, to parse ESM and expressions according to a particular grammar (such as a programming language, typically JavaScript). For an example integrating with SWC, see `tests/test_utils/mod.rs`. The integration occurs with two functions passed in `options`: `mdx_expression_parse` and `mdx_esm_parse`. The can signal back to micromark when they are successful, whether there is an error at the end (in which case micromark will try to parse more), or whether there is a syntax error (in which case micromark will crash). --- Cargo.toml | 5 +- src/compiler.rs | 3 +- src/construct/flow.rs | 8 + src/construct/mdx_esm.rs | 224 +++++++++++++++++ src/construct/mod.rs | 2 + src/construct/partial_mdx_expression.rs | 98 ++++++-- src/construct/partial_mdx_jsx.rs | 3 - src/event.rs | 42 +++- src/lib.rs | 180 +++++++++++++- src/state.rs | 20 +- src/tokenizer.rs | 4 + src/util/mdx_collect.rs | 70 ++++++ src/util/mod.rs | 1 + tests/code_indented.rs | 6 +- tests/gfm_table.rs | 6 +- tests/math_text.rs | 14 +- tests/mdx_esm.rs | 241 ++++++++++++++++++ tests/mdx_expression_flow.rs | 281 ++++++++++----------- tests/mdx_expression_text.rs | 422 ++++++++++++++++---------------- tests/mdx_jsx_text.rs | 96 ++++---- tests/mdx_swc.rs | 47 ++++ tests/test_utils/mod.rs | 245 ++++++++++++++++++ 22 files changed, 1573 insertions(+), 445 deletions(-) create mode 100644 src/construct/mdx_esm.rs create mode 100644 src/util/mdx_collect.rs create mode 100644 tests/mdx_esm.rs create mode 100644 tests/mdx_swc.rs create mode 100644 tests/test_utils/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 53b2a62..16c0622 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,8 +24,11 @@ unicode-id = { version = "0.3", features = ["no_std"] } [dev-dependencies] env_logger = "0.9" -criterion = "0.3" +criterion = "0.4" pretty_assertions = "1" +swc_common = "0.28" +swc_ecma_parser = "0.119" +swc_ecma_ast = "0.91" [build-dependencies] regex = "1" diff --git a/src/compiler.rs b/src/compiler.rs index d1ac774..eaa15ee 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -364,6 +364,7 @@ fn enter(context: &mut CompileContext) { | Name::HeadingAtxText | Name::HeadingSetextText | Name::Label + | Name::MdxEsm | Name::MdxFlowExpression | Name::MdxTextExpression | Name::MdxJsxFlowTag @@ -412,7 +413,7 @@ fn exit(context: &mut CompileContext) { | Name::Resource => { on_exit_drop(context); } - Name::MdxFlowExpression | Name::MdxJsxFlowTag => on_exit_drop_slurp(context), + Name::MdxEsm | Name::MdxFlowExpression | Name::MdxJsxFlowTag => on_exit_drop_slurp(context), Name::CharacterEscapeValue | Name::CodeTextData | Name::Data | Name::MathTextData => { on_exit_data(context); } diff --git a/src/construct/flow.rs b/src/construct/flow.rs index 08e0466..d6a79d8 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -15,6 +15,7 @@ //! * [Heading (atx)][crate::construct::heading_atx] //! * [Heading (setext)][crate::construct::heading_setext] //! * [HTML (flow)][crate::construct::html_flow] +//! * [MDX esm][crate::construct::mdx_esm] //! * [MDX expression (flow)][crate::construct::mdx_expression_flow] //! * [MDX JSX (flow)][crate::construct::mdx_jsx_flow] //! * [Raw (flow)][crate::construct::raw_flow] (code (fenced), math (flow)) @@ -66,6 +67,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::HtmlFlowStart) } + Some(b'e' | b'i') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeContent), + ); + State::Retry(StateName::MdxEsmStart) + } Some(b'{') => { tokenizer.attempt( State::Next(StateName::FlowAfter), diff --git a/src/construct/mdx_esm.rs b/src/construct/mdx_esm.rs new file mode 100644 index 0000000..53f8beb --- /dev/null +++ b/src/construct/mdx_esm.rs @@ -0,0 +1,224 @@ +//! MDX ESM occurs in the [flow][] content type. +//! +//! ## Grammar +//! +//! MDX expression (flow) forms with the following BNF +//! (see [construct][crate::construct] for character groups): +//! +//! ```bnf +//! mdx_esm ::= word *line *(eol *line) +//! +//! word ::= 'e' 'x' 'p' 'o' 'r' 't' | 'i' 'm' 'p' 'o' 'r' 't' +//! ``` +//! +//! This construct must be followed by a blank line or eof (end of file). +//! It can include blank lines if [`MdxEsmParse`][crate::MdxEsmParse] passed in +//! `options.mdx_esm_parse` allows it. +//! +//! ## Tokens +//! +//! * [`LineEnding`][Name::LineEnding] +//! * [`MdxEsm`][Name::MdxEsm] +//! * [`MdxEsmData`][Name::MdxEsmData] +//! +//! ## References +//! +//! * [`syntax.js` in `micromark-extension-mdxjs-esm`](https://github.com/micromark/micromark-extension-mdxjs-esm/blob/main/dev/lib/syntax.js) +//! * [`mdxjs.com`](https://mdxjs.com) +//! +//! [flow]: crate::construct::flow + +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; +use crate::util::{ + mdx_collect::{collect, place_to_point}, + slice::Slice, +}; +use crate::MdxSignal; +use alloc::format; + +/// Start of MDX ESM. +/// +/// ```markdown +/// > | import a from 'b' +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + // If it’s turned on. + if tokenizer.parse_state.options.constructs.mdx_esm + // If there is a gnostic parser. + && tokenizer.parse_state.options.mdx_esm_parse.is_some() + // When not interrupting. + && !tokenizer.interrupt + // Only at the start of a line, not at whitespace or in a container. + && tokenizer.point.column == 1 + && matches!(tokenizer.current, Some(b'e' | b'i')) + { + // Place where keyword starts. + tokenizer.tokenize_state.start = tokenizer.point.index; + tokenizer.enter(Name::MdxEsm); + tokenizer.enter(Name::MdxEsmData); + tokenizer.consume(); + State::Next(StateName::MdxEsmWord) + } else { + State::Nok + } +} + +/// In keyword. +/// +/// ```markdown +/// > | import a from 'b' +/// ^^^^^^ +/// ``` +pub fn word(tokenizer: &mut Tokenizer) -> State { + if matches!(tokenizer.current, Some(b'a'..=b'z')) { + tokenizer.consume(); + State::Next(StateName::MdxEsmWord) + } else { + let slice = Slice::from_indices( + tokenizer.parse_state.bytes, + tokenizer.tokenize_state.start, + tokenizer.point.index, + ); + + if matches!(slice.as_str(), "export" | "import") && tokenizer.current == Some(b' ') { + tokenizer.concrete = true; + tokenizer.tokenize_state.start = tokenizer.events.len() - 1; + tokenizer.consume(); + State::Next(StateName::MdxEsmInside) + } else { + tokenizer.tokenize_state.start = 0; + State::Nok + } + } +} + +/// In data. +/// +/// ```markdown +/// > | import a from 'b' +/// ^ +/// ``` +pub fn inside(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::MdxEsmData); + State::Retry(StateName::MdxEsmLineStart) + } + _ => { + tokenizer.consume(); + State::Next(StateName::MdxEsmInside) + } + } +} + +/// At start of line. +/// +/// ```markdown +/// | import a from 'b' +/// > | export {a} +/// ^ +/// ``` +pub fn line_start(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => State::Retry(StateName::MdxEsmAtEnd), + Some(b'\n') => { + tokenizer.check( + State::Next(StateName::MdxEsmAtEnd), + State::Next(StateName::MdxEsmContinuationStart), + ); + State::Retry(StateName::MdxEsmBlankLineBefore) + } + _ => { + tokenizer.enter(Name::MdxEsmData); + tokenizer.consume(); + State::Next(StateName::MdxEsmInside) + } + } +} + +/// At start of line that continues. +/// +/// ```markdown +/// | import a from 'b' +/// > | export {a} +/// ^ +/// ``` +pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::MdxEsmLineStart) +} + +/// At start of a potentially blank line. +/// +/// ```markdown +/// | import a from 'b' +/// > | export {a} +/// ^ +/// ``` +pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::BlankLineStart) +} + +/// At end of line (blank or eof). +/// +/// ```markdown +/// > | import a from 'b' +/// ^ +/// ``` +pub fn at_end(tokenizer: &mut Tokenizer) -> State { + let result = parse_esm(tokenizer); + + // Done!. + if matches!(result, State::Ok) { + tokenizer.concrete = false; + tokenizer.exit(Name::MdxEsm); + } + + result +} + +/// Parse ESM with a given function. +fn parse_esm(tokenizer: &mut Tokenizer) -> State { + // We can `unwrap` because we don’t parse if this is `None`. + let parse = tokenizer + .parse_state + .options + .mdx_esm_parse + .as_ref() + .unwrap(); + + // Collect the body of the ESM and positional info for each run of it. + let result = collect( + tokenizer, + tokenizer.tokenize_state.start, + &[Name::MdxEsmData, Name::LineEnding], + ); + + // Parse and handle what was signaled back. + match parse(&result.value) { + MdxSignal::Ok => State::Ok, + MdxSignal::Error(message, place) => { + let point = place_to_point(&result, place); + State::Error(format!("{}:{}: {}", point.line, point.column, message)) + } + MdxSignal::Eof(message) => { + if tokenizer.current == None { + State::Error(format!( + "{}:{}: {}", + tokenizer.point.line, tokenizer.point.column, message + )) + } else { + tokenizer.tokenize_state.mdx_last_parse_error = Some(message); + State::Retry(StateName::MdxEsmContinuationStart) + } + } + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ae6facf..88f3050 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -66,6 +66,7 @@ //! * [gfm task list item check][gfm_task_list_item_check] //! * [mdx expression (flow)][mdx_expression_flow] //! * [mdx expression (text)][mdx_expression_text] +//! * [mdx esm][mdx_esm] //! * [mdx jsx (flow)][mdx_jsx_flow] //! * [mdx jsx (text)][mdx_jsx_text] //! @@ -169,6 +170,7 @@ pub mod label_end; pub mod label_start_image; pub mod label_start_link; pub mod list_item; +pub mod mdx_esm; pub mod mdx_expression_flow; pub mod mdx_expression_text; pub mod mdx_jsx_flow; diff --git a/src/construct/partial_mdx_expression.rs b/src/construct/partial_mdx_expression.rs index 31a9af8..3ebd0f0 100644 --- a/src/construct/partial_mdx_expression.rs +++ b/src/construct/partial_mdx_expression.rs @@ -14,7 +14,6 @@ //! ## Tokens //! //! * [`LineEnding`][Name::LineEnding] -//! * [`SpaceOrTab`][Name::SpaceOrTab] //! * [`MdxExpressionMarker`][Name::MdxExpressionMarker] //! * [`MdxExpressionData`][Name::MdxExpressionData] //! @@ -61,7 +60,12 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max; use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -use alloc::format; +use crate::util::{ + constant::TAB_SIZE, + mdx_collect::{collect, place_to_point}, +}; +use crate::{MdxExpressionKind, MdxExpressionParse, MdxSignal}; +use alloc::{format, string::ToString}; /// Start of an MDX expression. /// @@ -75,6 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Name::MdxExpressionMarker); tokenizer.consume(); tokenizer.exit(Name::MdxExpressionMarker); + tokenizer.tokenize_state.start = tokenizer.events.len() - 1; State::Next(StateName::MdxExpressionBefore) } @@ -88,8 +93,10 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { State::Error(format!( - "{}:{}: Unexpected end of file in expression, expected a corresponding closing brace for `{{`", - tokenizer.point.line, tokenizer.point.column + "{}:{}: {}", + tokenizer.point.line, tokenizer.point.column, + tokenizer.tokenize_state.mdx_last_parse_error.take() + .unwrap_or_else(|| "Unexpected end of file in expression, expected a corresponding closing brace for `{`".to_string()) )) } Some(b'\n') => { @@ -97,24 +104,26 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(Name::LineEnding); State::Next(StateName::MdxExpressionEolAfter) - }, + } Some(b'}') if tokenizer.tokenize_state.size == 0 => { - if tokenizer.tokenize_state.token_1 == Name::MdxJsxTagAttributeValueExpression && !tokenizer.tokenize_state.seen { - State::Error(format!( - "{}:{}: Unexpected empty expression, expected a value between braces", - tokenizer.point.line, tokenizer.point.column - )) + let state = if let Some(ref parse) = tokenizer.parse_state.options.mdx_expression_parse + { + parse_expression(tokenizer, parse) } else { - tokenizer.tokenize_state.seen = false; + State::Ok + }; + + if state == State::Ok { + tokenizer.tokenize_state.start = 0; tokenizer.enter(Name::MdxExpressionMarker); tokenizer.consume(); tokenizer.exit(Name::MdxExpressionMarker); tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); - State::Ok } - }, + + state + } Some(_) => { - tokenizer.tokenize_state.seen = true; tokenizer.enter(Name::MdxExpressionData); State::Retry(StateName::MdxExpressionInside) } @@ -134,8 +143,10 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Name::MdxExpressionData); State::Retry(StateName::MdxExpressionBefore) } else { - // To do: don’t count if gnostic. - if tokenizer.current == Some(b'{') { + // Don’t count if gnostic. + if tokenizer.current == Some(b'{') + && tokenizer.parse_state.options.mdx_expression_parse.is_none() + { tokenizer.tokenize_state.size += 1; } else if tokenizer.current == Some(b'}') { tokenizer.tokenize_state.size -= 1; @@ -165,9 +176,60 @@ pub fn eol_after(tokenizer: &mut Tokenizer) -> State { )) } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt(State::Next(StateName::MdxExpressionBefore), State::Nok); - // To do: use `start_column` + constants.tabSize for max space to eat. - State::Next(space_or_tab_min_max(tokenizer, 0, usize::MAX)) + // Idea: investigate if we’d need to use more complex stripping. + // Take this example: + // + // ```markdown + // > aaa d + // > `} /> eee + // ``` + // + // Currently, the “paragraph” starts at `> | aaa`, so for the next line + // here we split it into `>␠|␠␠␠␠|␠d` (prefix, this indent here, + // expression data). + // The intention above is likely for the split to be as `>␠␠|␠␠␠␠|d`, + // which is impossible, but we can mimick it with `>␠|␠␠␠␠␠|d`. + // + // To improve the situation, we could take `tokenizer.line_start` at + // the start of the expression and move past whitespace. + // For future lines, we’d move at most to + // `line_start_shifted.column + 4`. + State::Retry(space_or_tab_min_max(tokenizer, 0, TAB_SIZE)) } else { State::Retry(StateName::MdxExpressionBefore) } } + +/// Parse an expression with a given function. +fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> State { + // Collect the body of the expression and positional info for each run of it. + let result = collect( + tokenizer, + tokenizer.tokenize_state.start, + &[Name::MdxExpressionData, Name::LineEnding], + ); + + // Turn the name of the expression into a kind. + let kind = match tokenizer.tokenize_state.token_1 { + Name::MdxFlowExpression | Name::MdxTextExpression => MdxExpressionKind::Expression, + Name::MdxJsxTagAttributeExpression => MdxExpressionKind::AttributeExpression, + Name::MdxJsxTagAttributeValueExpression => MdxExpressionKind::AttributeValueExpression, + _ => unreachable!("cannot handle unknown expression name"), + }; + + // Parse and handle what was signaled back. + match parse(&result.value, kind) { + MdxSignal::Ok => State::Ok, + MdxSignal::Error(message, place) => { + let point = place_to_point(&result, place); + State::Error(format!("{}:{}: {}", point.line, point.column, message)) + } + MdxSignal::Eof(message) => { + tokenizer.tokenize_state.mdx_last_parse_error = Some(message); + tokenizer.enter(Name::MdxExpressionData); + tokenizer.consume(); + State::Next(StateName::MdxExpressionInside) + } + } +} diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs index 9177b5b..e49a8e0 100644 --- a/src/construct/partial_mdx_jsx.rs +++ b/src/construct/partial_mdx_jsx.rs @@ -611,8 +611,6 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State { Some(b'>') => State::Retry(StateName::MdxJsxTagEnd), // Attribute expression. Some(b'{') => { - // To do: force `spread: true` if gnostic. - // To do: pass `start_point` if gnostic. tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone(); tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeExpression; tokenizer.attempt( @@ -886,7 +884,6 @@ pub fn attribute_value_before(tokenizer: &mut Tokenizer) -> State { } // Attribute value expression. Some(b'{') => { - // To do: pass `start_point` if gnostic. tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone(); tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeValueExpression; tokenizer.attempt( diff --git a/src/event.rs b/src/event.rs index a2626ee..b3fa9ae 100644 --- a/src/event.rs +++ b/src/event.rs @@ -2391,6 +2391,45 @@ pub enum Name { /// ^ ^ /// ``` MathTextSequence, + /// MDX extension: ESM. + /// + /// ## Info + /// + /// * **Context**: + /// [flow content][crate::construct::flow] + /// * **Content model**: + /// void + /// [`MdxEsmData`][Name::MdxEsmData], + /// [`SpaceOrTab`][Name::SpaceOrTab], + /// [`LineEnding`][Name::LineEnding] + /// * **Construct**: + /// [`mdx_esm`][crate::construct::mdx_esm] + /// + /// ## Example + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + MdxEsm, + /// MDX extension: ESM data. + /// + /// ## Info + /// + /// * **Context**: + /// [`MdxEsm`][Name::MdxEsm] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`mdx_esm`][crate::construct::mdx_esm] + /// + /// ## Example + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + MdxEsmData, /// MDX extension: expression marker. /// /// ## Info @@ -3336,7 +3375,7 @@ pub enum Name { } /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 75] = [ +pub const VOID_EVENTS: [Name; 76] = [ Name::AttentionSequence, Name::AutolinkEmail, Name::AutolinkMarker, @@ -3391,6 +3430,7 @@ pub const VOID_EVENTS: [Name; 75] = [ Name::MathFlowChunk, Name::MathTextData, Name::MathTextSequence, + Name::MdxEsmData, Name::MdxExpressionMarker, Name::MdxExpressionData, Name::MdxJsxTagMarker, diff --git a/src/lib.rs b/src/lib.rs index 47a125f..0b1a571 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ mod util; use crate::compiler::compile; use crate::parser::parse; -use alloc::string::String; +use alloc::{boxed::Box, fmt, string::String}; /// Type of line endings in markdown. #[derive(Clone, Debug, Default, Eq, PartialEq)] @@ -79,6 +79,71 @@ impl LineEnding { } } +/// Signal used as feedback when parsing MDX esm/expressions. +#[derive(Clone, Debug)] +pub enum MdxSignal { + /// A syntax error. + /// + /// `micromark-rs` will crash with error message `String`, and convert the + /// `usize` (byte offset into `&str` passed to `MdxExpressionParse` or + /// `MdxEsmParse`) to where it happened in the whole document. + /// + /// ## Examples + /// + /// ```rust ignore + /// MdxSignal::Error("Unexpected `\"`, expected identifier".to_string(), 1) + /// ``` + Error(String, usize), + /// An error at the end of the (partial?) expression. + /// + /// `micromark-rs` will either crash with error message `String` if it + /// doesn’t have any more text, or it will try again later when more text + /// is available. + /// + /// ## Examples + /// + /// ```rust ignore + /// MdxSignal::Eof("Unexpected end of file in string literal".to_string()) + /// ``` + Eof(String), + /// Done, successfully. + /// + /// `micromark-rs` knows that this is the end of a valid expression/esm and + /// continues with markdown. + /// + /// ## Examples + /// + /// ```rust ignore + /// MdxSignal::Ok + /// ``` + Ok, +} + +/// Expression kind. +#[derive(Clone, Debug)] +pub enum MdxExpressionKind { + /// Kind of expressions in prose: `# {Math.PI}` and `{Math.PI}`. + Expression, + /// Kind of expressions as attributes: `` + AttributeExpression, + /// Kind of expressions as attribute values: ``. + AttributeValueExpression, +} + +/// Signature of a function that parses expressions. +/// +/// Can be passed as `mdx_expression_parse` in [`Options`][] to support +/// expressions according to a certain grammar (typically, a programming +/// language). +pub type MdxExpressionParse = dyn Fn(&str, MdxExpressionKind) -> MdxSignal; + +/// Signature of a function that parses ESM. +/// +/// Can be passed as `mdx_esm_parse` in [`Options`][] to support +/// ESM according to a certain grammar (typically, a programming +/// language). +pub type MdxEsmParse = dyn Fn(&str) -> MdxSignal; + /// Control which constructs are enabled. /// /// Not all constructs can be configured. @@ -301,12 +366,28 @@ pub struct Constructs { /// ^^^ /// ``` pub math_text: bool, + /// MDX: ESM. + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + /// + /// > 👉 **Note**: you *must* pass [`options.mdx_esm_parse`][MdxEsmParse] + /// > too. + /// > Otherwise, this option has no affect. + pub mdx_esm: bool, /// MDX: expression (flow). /// /// ```markdown /// > | {Math.PI} /// ^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). pub mdx_expression_flow: bool, /// MDX: expression (text). /// @@ -314,6 +395,11 @@ pub struct Constructs { /// > | a {Math.PI} c /// ^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). pub mdx_expression_text: bool, /// MDX: JSX (flow). /// @@ -321,6 +407,11 @@ pub struct Constructs { /// > | /// ^^^^^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions in JSX according to a certain grammar + /// > (typically, a programming language). pub mdx_jsx_flow: bool, /// MDX: JSX (text). /// @@ -328,6 +419,11 @@ pub struct Constructs { /// > | a c /// ^^^^^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions in JSX according to a certain grammar + /// > (typically, a programming language). pub mdx_jsx_text: bool, /// Thematic break. /// @@ -370,6 +466,7 @@ impl Default for Constructs { list_item: true, math_flow: false, math_text: false, + mdx_esm: false, mdx_expression_flow: false, mdx_expression_text: false, mdx_jsx_flow: false, @@ -405,6 +502,13 @@ impl Constructs { /// This turns on `CommonMark`, turns off some conflicting constructs /// (autolinks, code (indented), html), and turns on MDX (JSX, /// expressions, ESM). + /// + /// > 👉 **Note**: you *must* pass [`options.mdx_esm_parse`][MdxEsmParse] + /// > to support ESM. + /// > You *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). #[must_use] pub fn mdx() -> Self { Self { @@ -412,6 +516,7 @@ impl Constructs { code_indented: false, html_flow: false, html_text: false, + mdx_esm: true, mdx_expression_flow: true, mdx_expression_text: true, mdx_jsx_flow: true, @@ -423,8 +528,8 @@ impl Constructs { /// Configuration (optional). #[allow(clippy::struct_excessive_bools)] -#[derive(Clone, Debug)] pub struct Options { + // Note: when adding fields, don’t forget to add them to `fmt::Debug` below. /// Whether to allow (dangerous) HTML. /// The default is `false`, you can turn it on to `true` for trusted /// content. @@ -913,6 +1018,75 @@ pub struct Options { /// # } /// ``` pub math_text_single_dollar: bool, + + /// Function to parse expressions with. + /// + /// This can be used to parse expressions with a parser. + /// It can be used to support for arbitrary programming languages within + /// expressions. + /// + /// For an example that adds support for JavaScript with SWC, see + /// `tests/test_utils/mod.rs`. + pub mdx_expression_parse: Option>, + + /// Function to parse ESM with. + /// + /// This can be used to parse ESM with a parser. + /// It can be used to support for arbitrary programming languages within + /// ESM, however, the keywords (`export`, `import`) are currently hardcoded + /// JavaScript-specific. + /// + /// For an example that adds support for JavaScript with SWC, see + /// `tests/test_utils/mod.rs`. + pub mdx_esm_parse: Option>, + // Note: when adding fields, don’t forget to add them to `fmt::Debug` below. +} + +impl fmt::Debug for Options { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Options") + .field("allow_dangerous_html", &self.allow_dangerous_html) + .field("allow_dangerous_protocol", &self.allow_dangerous_protocol) + .field("constructs", &self.constructs) + .field("default_line_ending", &self.default_line_ending) + .field("gfm_footnote_label", &self.gfm_footnote_label) + .field( + "gfm_footnote_label_tag_name", + &self.gfm_footnote_label_tag_name, + ) + .field( + "gfm_footnote_label_attributes", + &self.gfm_footnote_label_attributes, + ) + .field("gfm_footnote_back_label", &self.gfm_footnote_back_label) + .field( + "gfm_footnote_clobber_prefix", + &self.gfm_footnote_clobber_prefix, + ) + .field( + "gfm_strikethrough_single_tilde", + &self.gfm_strikethrough_single_tilde, + ) + .field("gfm_tagfilter", &self.gfm_tagfilter) + .field("math_text_single_dollar", &self.math_text_single_dollar) + .field( + "mdx_expression_parse", + if self.mdx_expression_parse.is_none() { + &"None" + } else { + &"Some([Function])" + }, + ) + .field( + "mdx_esm_parse", + if self.mdx_esm_parse.is_none() { + &"None" + } else { + &"Some([Function])" + }, + ) + .finish() + } } impl Default for Options { @@ -931,6 +1105,8 @@ impl Default for Options { gfm_strikethrough_single_tilde: true, gfm_tagfilter: false, math_text_single_dollar: true, + mdx_expression_parse: None, + mdx_esm_parse: None, } } } diff --git a/src/state.rs b/src/state.rs index 896761e..1cc2720 100644 --- a/src/state.rs +++ b/src/state.rs @@ -344,6 +344,14 @@ pub enum Name { ListItemContBlank, ListItemContFilled, + MdxEsmStart, + MdxEsmWord, + MdxEsmInside, + MdxEsmLineStart, + MdxEsmBlankLineBefore, + MdxEsmContinuationStart, + MdxEsmAtEnd, + MdxExpressionTextStart, MdxExpressionTextAfter, @@ -356,8 +364,6 @@ pub enum Name { MdxExpressionBefore, MdxExpressionInside, MdxExpressionEolAfter, - MdxJsxAttributeValueExpressionAfter, - MdxJsxAttributeExpressionAfter, MdxJsxFlowStart, MdxJsxFlowBefore, @@ -385,6 +391,7 @@ pub enum Name { MdxJsxLocalNameAfter, MdxJsxAttributeBefore, MdxJsxSelfClosing, + MdxJsxAttributeExpressionAfter, MdxJsxAttributePrimaryName, MdxJsxAttributePrimaryNameAfter, MdxJsxAttributeLocalNameBefore, @@ -393,6 +400,7 @@ pub enum Name { MdxJsxAttributeValueBefore, MdxJsxAttributeValueQuotedStart, MdxJsxAttributeValueQuoted, + MdxJsxAttributeValueExpressionAfter, NonLazyContinuationStart, NonLazyContinuationAfter, @@ -822,6 +830,14 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::ListItemContBlank => construct::list_item::cont_blank, Name::ListItemContFilled => construct::list_item::cont_filled, + Name::MdxEsmStart => construct::mdx_esm::start, + Name::MdxEsmWord => construct::mdx_esm::word, + Name::MdxEsmInside => construct::mdx_esm::inside, + Name::MdxEsmLineStart => construct::mdx_esm::line_start, + Name::MdxEsmBlankLineBefore => construct::mdx_esm::blank_line_before, + Name::MdxEsmContinuationStart => construct::mdx_esm::continuation_start, + Name::MdxEsmAtEnd => construct::mdx_esm::at_end, + Name::MdxExpressionStart => construct::partial_mdx_expression::start, Name::MdxExpressionBefore => construct::partial_mdx_expression::before, Name::MdxExpressionInside => construct::partial_mdx_expression::inside, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8441f7e..84d3d6d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -227,6 +227,9 @@ pub struct TokenizeState<'a> { /// List of defined GFM footnote definition identifiers. pub gfm_footnote_definitions: Vec, + // Last error message provided at an EOF of an expression. + pub mdx_last_parse_error: Option, + /// Whether to connect events. pub connect: bool, /// Marker. @@ -343,6 +346,7 @@ impl<'a> Tokenizer<'a> { document_at_first_paragraph_of_list_item: false, definitions: vec![], gfm_footnote_definitions: vec![], + mdx_last_parse_error: None, end: 0, label_starts: vec![], label_starts_loose: vec![], diff --git a/src/util/mdx_collect.rs b/src/util/mdx_collect.rs new file mode 100644 index 0000000..73ead51 --- /dev/null +++ b/src/util/mdx_collect.rs @@ -0,0 +1,70 @@ +//! Collect info for MDX. + +use crate::event::{Kind, Name, Point}; +use crate::tokenizer::Tokenizer; +use crate::util::slice::{Position, Slice}; +use alloc::{string::String, vec, vec::Vec}; + +pub type Location<'a> = (usize, &'a Point); + +pub struct Result<'a> { + pub start: &'a Point, + pub value: String, + pub locations: Vec>, +} + +pub fn collect<'a>(tokenizer: &'a Tokenizer, from: usize, names: &[Name]) -> Result<'a> { + let mut result = Result { + start: &tokenizer.events[from].point, + value: String::new(), + locations: vec![], + }; + let mut index = from; + let mut acc = 0; + + while index < tokenizer.events.len() { + if tokenizer.events[index].kind == Kind::Enter + && names.contains(&tokenizer.events[index].name) + { + // Include virtual spaces. + let value = Slice::from_position( + tokenizer.parse_state.bytes, + &Position { + start: &tokenizer.events[index].point, + end: &tokenizer.events[index + 1].point, + }, + ) + .serialize(); + acc += value.len(); + result.locations.push((acc, &tokenizer.events[index].point)); + result.value.push_str(&value); + } + + index += 1; + } + + result +} + +// Turn an index of `result.value` into a point in the whole document. +pub fn place_to_point(result: &Result, place: usize) -> Point { + let mut index = 0; + let mut point = result.start; + let mut rest = place; + + while index < result.locations.len() { + point = result.locations[index].1; + + if result.locations[index].0 > place { + break; + } + + rest = place - result.locations[index].0; + index += 1; + } + + let mut point = point.clone(); + point.column += rest; + point.index += rest; + point +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 2ea372c..6281356 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -6,6 +6,7 @@ pub mod decode_character_reference; pub mod edit_map; pub mod encode; pub mod gfm_tagfilter; +pub mod mdx_collect; pub mod normalize_identifier; pub mod sanitize_uri; pub mod skip; diff --git a/tests/code_indented.rs b/tests/code_indented.rs index 29d8909..bf39fa3 100644 --- a/tests/code_indented.rs +++ b/tests/code_indented.rs @@ -167,7 +167,11 @@ fn code_indented() -> Result<(), String> { "a ", &Options { allow_dangerous_html: true, - ..off.clone() + constructs: Constructs { + code_indented: false, + ..Constructs::default() + }, + ..Options::default() } )?, "

a

", diff --git a/tests/gfm_table.rs b/tests/gfm_table.rs index b7f884a..8c46a30 100644 --- a/tests/gfm_table.rs +++ b/tests/gfm_table.rs @@ -1037,7 +1037,8 @@ bar "###, &Options { allow_dangerous_html: true, - ..gfm.clone() + constructs: Constructs::gfm(), + ..Options::default() } )?, r###"

Grave accents

@@ -1345,7 +1346,8 @@ b "###, &Options { allow_dangerous_html: true, - ..gfm.clone() + constructs: Constructs::gfm(), + ..Options::default() } )?, r###"

Blank line

diff --git a/tests/math_text.rs b/tests/math_text.rs index dced393..7b53268 100644 --- a/tests/math_text.rs +++ b/tests/math_text.rs @@ -30,7 +30,12 @@ fn math_text() -> Result<(), String> { "$foo$ $$bar$$", &Options { math_text_single_dollar: false, - ..math.clone() + constructs: Constructs { + math_text: true, + math_flow: true, + ..Constructs::default() + }, + ..Options::default() } )?, "

$foo$ bar

", @@ -133,7 +138,12 @@ fn math_text() -> Result<(), String> { &Options { allow_dangerous_html: true, allow_dangerous_protocol: true, - ..math.clone() + constructs: Constructs { + math_text: true, + math_flow: true, + ..Constructs::default() + }, + ..Options::default() } )?, "

$

", diff --git a/tests/mdx_esm.rs b/tests/mdx_esm.rs new file mode 100644 index 0000000..f1ea122 --- /dev/null +++ b/tests/mdx_esm.rs @@ -0,0 +1,241 @@ +extern crate micromark; +mod test_utils; +use micromark::{micromark_with_options, Constructs, Options}; +use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; + +#[test] +fn mdx_esm() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("import a from 'b'\n\nc", &swc)?, + "

c

", + "should support an import" + ); + + assert_eq!( + micromark_with_options("export default a\n\nb", &swc)?, + "

b

", + "should support an export" + ); + + assert_eq!( + micromark_with_options("impossible", &swc)?, + "

impossible

", + "should not support other keywords (`impossible`)" + ); + + assert_eq!( + micromark_with_options("exporting", &swc)?, + "

exporting

", + "should not support other keywords (`exporting`)" + ); + + assert_eq!( + micromark_with_options("import.", &swc)?, + "

import.

", + "should not support a non-whitespace after the keyword" + ); + + assert_eq!( + micromark_with_options("import('a')", &swc)?, + "

import('a')

", + "should not support a non-whitespace after the keyword (import-as-a-function)" + ); + + assert_eq!( + micromark_with_options(" import a from 'b'\n export default c", &swc)?, + "

import a from 'b'\nexport default c

", + "should not support an indent" + ); + + assert_eq!( + micromark_with_options("- import a from 'b'\n> export default c", &swc)?, + "
    \n
  • import a from 'b'
  • \n
\n
\n

export default c

\n
", + "should not support keywords in containers" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\nexport default c", &swc)?, + "", + "should support imports and exports in the same “block”" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n\nexport default c", &swc)?, + "", + "should support imports and exports in separate “blocks”" + ); + + assert_eq!( + micromark_with_options("a\n\nimport a from 'b'\n\nb\n\nexport default c", &swc)?, + "

a

\n

b

\n", + "should support imports and exports in between other constructs" + ); + + assert_eq!( + micromark_with_options("a\nimport a from 'b'\n\nb\nexport default c", &swc)?, + "

a\nimport a from 'b'

\n

b\nexport default c

", + "should not support import/exports when interrupting paragraphs" + ); + + assert_eq!( + micromark_with_options("import a", &swc).err().unwrap(), + "1:9: Could not parse esm with swc: Expected ',', got ''", + "should crash on invalid import/exports (1)" + ); + + assert_eq!( + micromark_with_options("import 1/1", &swc).err().unwrap(), + "1:9: Could not parse esm with swc: Expected 'from', got 'numeric literal (1, 1)'", + "should crash on invalid import/exports (2)" + ); + + assert_eq!( + micromark_with_options("export {\n a\n} from 'b'\n\nc", &swc)?, + "

c

", + "should support line endings in import/exports" + ); + + assert_eq!( + micromark_with_options("export {\n\n a\n\n} from 'b'\n\nc", &swc)?, + "

c

", + "should support blank lines in import/exports" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n*md*?", &swc) + .err() + .unwrap(), + "2:6: Could not parse esm with swc: Unexpected token `?`. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, ` for template literal, (, or an identifier", + "should crash on markdown after import/export w/o blank line" + ); + + assert_eq!( + micromark_with_options("export var a = 1\n// b\n/* c */\n\nd", &swc)?, + "

d

", + "should support comments in “blocks”" + ); + + assert_eq!( + micromark_with_options("export var a = 1\nvar b\n\nc", &swc) + .err() + .unwrap(), + "2:1: Unexpected statement in code: only import/exports are supported", + "should crash on other statements in “blocks”" + ); + + assert_eq!( + micromark_with_options("import ('a')\n\nb", &swc) + .err() + .unwrap(), + "1:1: Unexpected statement in code: only import/exports are supported", + "should crash on import-as-a-function with a space `import (x)`" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\nexport {a}\n\nc", &swc)?, + "

c

", + "should support a reexport from another import" + ); + + assert_eq!( + micromark_with_options("import a from 'b';\nexport {a};\n\nc", &swc)?, + "

c

", + "should support a reexport from another import w/ semicolons" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\nexport {a as default}\n\nc", &swc)?, + "

c

", + "should support a reexport default from another import" + ); + + assert_eq!( + micromark_with_options("export var a = () => ", &swc)?, + "", + "should support JSX by default" + ); + + assert_eq!( + micromark_with_options("export {a}\n", &swc)?, + "", + "should support EOF after EOL" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n\nexport {a}\n\nc", &swc)?, + "

c

", + "should support a reexport from another esm block (1)" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n\nexport {a}\n\n# c", &swc)?, + "

c

", + "should support a reexport from another esm block (2)" + ); + + let cases = vec![ + ("default", "import a from \"b\""), + ("whole", "import * as a from \"b\""), + ("destructuring", "import {a} from \"b\""), + ("destructuring and rename", "import {a as b} from \"c\""), + ("default and destructuring", "import a, {b as c} from \"d\""), + ("default and whole", "import a, * as b from \"c\""), + ("side-effects", "import \"a\""), + ]; + + for case in cases { + assert_eq!( + micromark_with_options(case.1, &swc)?, + "", + "should support imports: {}", + case.0 + ); + } + + let cases = vec![ + ("var", "export var a = \"\""), + ("const", "export const a = \"\""), + ("let", "export let a = \"\""), + ("multiple", "export var a, b"), + ("multiple w/ assignment", "export var a = \"a\", b = \"b\""), + ("function", "export function a() {}"), + ("class", "export class a {}"), + ("destructuring", "export var {a} = {}"), + ("rename destructuring", "export var {a: b} = {}"), + ("array destructuring", "export var [a] = []"), + ("default", "export default a = 1"), + ("default function", "export default function a() {}"), + ("default class", "export default class a {}"), + ("aggregate", "export * from \"a\""), + ("whole reexport", "export * as a from \"b\""), + ("reexport destructuring", "export {a} from \"b\""), + ( + "reexport destructuring w rename", + "export {a as b} from \"c\"", + ), + ("reexport as a default whole", "export {default} from \"b\""), + ( + "reexport default and non-default", + "export {default as a, b} from \"c\"", + ), + ]; + + for case in cases { + assert_eq!( + micromark_with_options(case.1, &swc)?, + "", + "should support exports: {}", + case.0 + ); + } + + Ok(()) +} diff --git a/tests/mdx_expression_flow.rs b/tests/mdx_expression_flow.rs index 2a66a9d..81a31a7 100644 --- a/tests/mdx_expression_flow.rs +++ b/tests/mdx_expression_flow.rs @@ -1,6 +1,8 @@ extern crate micromark; +mod test_utils; use micromark::{micromark_with_options, Constructs, Options}; use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; #[test] fn mdx_expression_flow_agnostic() -> Result<(), String> { @@ -82,153 +84,132 @@ fn mdx_expression_flow_agnostic() -> Result<(), String> { Ok(()) } -// To do: swc. -// #[test] -// fn mdx_expression_flow_gnostic() -> Result<(), String> { -// assert_eq!( -// micromark_with_options("{a}", &swc), -// "", -// "should support an expression" -// ); - -// assert_eq!( -// micromark_with_options("{}", &swc)?, -// "", -// "should support an empty expression" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("{a", &swc); -// // }, -// // /Unexpected end of file in expression, expected a corresponding closing brace for `{`/, -// // "should crash if no closing brace is found (1)" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("{b { c }", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected content after expression/, -// // "should crash if no closing brace is found (2)" -// // ); - -// assert_eq!( -// micromark_with_options("{\n}\na", &swc)?, -// "

a

", -// "should support a line ending in an expression" -// ); - -// assert_eq!( -// micromark_with_options("{ a } \t\nb", &swc)?, -// "

b

", -// "should support expressions followed by spaces" -// ); - -// assert_eq!( -// micromark_with_options(" { a }\nb", &swc)?, -// "

b

", -// "should support expressions preceded by spaces" -// ); - -// assert_eq!( -// micromark_with_options(" {`\n a\n `}", &swc)?, -// "", -// "should support indented expressions" -// ); - -// assert_eq!( -// micromark_with_options("a{(b)}c", &swc)?, -// "

ac

", -// "should support expressions padded w/ parens" -// ); - -// assert_eq!( -// micromark_with_options("a{/* b */ ( (c) /* d */ + (e) )}f", &swc)?, -// "

af

", -// "should support expressions padded w/ parens and comments" -// ); - -// Ok(()) -// } - -// To do: move to JSX, actually test spread in expressions? -// To do: swc. -// #[test] -// fn mdx_expression_spread() -> Result<(), String> { -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {b} c", &swc); -// // }, -// // /Unexpected `Property` in code: only spread elements are supported/, -// // "should crash if not a spread" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {...?} c", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on an incorrect spread" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {...b,c} d", &swc); -// // }, -// // /Unexpected extra content in spread: only a single spread is supported/, -// // "should crash if a spread and other things" -// // ); - -// assert_eq!( -// micromark_with_options("a {} b", &swc)?, -// "

a b

", -// "should support an empty spread" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {} b", &swc); -// // }, -// // /Unexpected empty expression/, -// // "should crash on an empty spread w/ `allowEmpty: false`" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("{a=b}", &swc); -// // }, -// // /Could not parse expression with swc: Shorthand property assignments are valid only in destructuring patterns/, -// // "should crash if not a spread w/ `allowEmpty`" -// // ); - -// assert_eq!( -// micromark_with_options("a {/* b */} c", &swc)?, -// "

a c

", -// "should support a comment spread" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {/* b */} c", &swc); -// // }, -// // /Unexpected empty expression/, -// // "should crash on a comment spread w/ `allowEmpty: false`" -// // ); - -// assert_eq!( -// micromark_with_options("a {...b} c", &swc)?, -// "

a c

", -// "should support a spread" -// ); - -// Ok(()) -// } +#[test] +fn mdx_expression_flow_gnostic() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("{a}", &swc)?, + "", + "should support an expression" + ); + + assert_eq!( + micromark_with_options("{}", &swc)?, + "", + "should support an empty expression" + ); + + assert_eq!( + micromark_with_options("{a", &swc).err().unwrap(), + "1:3: Unexpected end of file in expression, expected a corresponding closing brace for `{`", + "should crash if no closing brace is found (1)" + ); + + assert_eq!( + micromark_with_options("{b { c }", &swc).err().unwrap(), + "1:4: Could not parse expression with swc: Unexpected content after expression", + "should crash if no closing brace is found (2)" + ); + + assert_eq!( + micromark_with_options("{\n}\na", &swc)?, + "

a

", + "should support a line ending in an expression" + ); + + assert_eq!( + micromark_with_options("{ a } \t\nb", &swc)?, + "

b

", + "should support expressions followed by spaces" + ); + + assert_eq!( + micromark_with_options(" { a }\nb", &swc)?, + "

b

", + "should support expressions preceded by spaces" + ); + + assert_eq!( + micromark_with_options(" {`\n a\n `}", &swc)?, + "", + "should support indented expressions" + ); + + assert_eq!( + micromark_with_options("a{(b)}c", &swc)?, + "

ac

", + "should support expressions padded w/ parens" + ); + + assert_eq!( + micromark_with_options("a{/* b */ ( (c) /* d */ + (e) )}f", &swc)?, + "

af

", + "should support expressions padded w/ parens and comments" + ); + + Ok(()) +} + +#[test] +fn mdx_expression_spread() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("
", &swc)?, + "", + "should support spreads for attribute expression" + ); + + assert_eq!( + micromark_with_options("", &swc).err().unwrap(), + "1:5: Expected a single spread value, such as `...x`", + "should crash if not a spread" + ); + + assert_eq!( + micromark_with_options("", &swc).err().unwrap(), + "1:13: Could not parse expression with swc: Unexpected token `?`. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, ` for template literal, (, or an identifier", + "should crash on an incorrect spread" + ); + + assert_eq!( + micromark_with_options("", &swc) + .err() + .unwrap(), + "1:5: Expected a single spread value, such as `...x`", + "should crash if a spread and other things" + ); + + assert_eq!( + micromark_with_options("", &swc).err().unwrap(), + "1:5: Expected a single spread value, such as `...x`", + "should crash on an empty spread" + ); + + assert_eq!( + micromark_with_options("", &swc).err().unwrap(), + "1:12: Could not parse expression with swc: assignment property is invalid syntax", + "should crash if not an identifier" + ); + + assert_eq!( + micromark_with_options("", &swc) + .err() + .unwrap(), + "1:5: Expected a single spread value, such as `...x`", + "should crash on a comment spread" + ); + + Ok(()) +} diff --git a/tests/mdx_expression_text.rs b/tests/mdx_expression_text.rs index b42faf2..3a48965 100644 --- a/tests/mdx_expression_text.rs +++ b/tests/mdx_expression_text.rs @@ -1,147 +1,144 @@ extern crate micromark; +mod test_utils; use micromark::{micromark_with_options, Constructs, Options}; use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; -// To do: swc. -// #[test] -// fn mdx_expression_text_gnostic_core() -> Result<(), String> { -// assert_eq!( -// micromark_with_options("a {} b", &swc)?, -// "

a b

", -// "should support an empty expression (1)" -// ); - -// assert_eq!( -// micromark_with_options("a { \t\r\n} b", &swc)?, -// "

a b

", -// "should support an empty expression (2)" -// ); - -// assert_eq!( -// micromark_with_options("a {/**/} b", &swc)?, -// "

a b

", -// "should support a multiline comment (1)" -// ); - -// assert_eq!( -// micromark_with_options("a { /*\n*/\t} b", &swc)?, -// "

a b

", -// "should support a multiline comment (2)" -// ); - -// assert_eq!( -// micromark_with_options("a {/*b*//*c*/} d", &swc)?, -// "

a d

", -// "should support a multiline comment (3)" -// ); - -// assert_eq!( -// micromark_with_options("a {b/*c*/} d", &swc)?, -// "

a d

", -// "should support a multiline comment (4)" -// ); - -// assert_eq!( -// micromark_with_options("a {/*b*/c} d", &swc)?, -// "

a d

", -// "should support a multiline comment (4)" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {//} b", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on an incorrect line comment (1)" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a { // b } c", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on an incorrect line comment (2)" -// // ); - -// assert_eq!( -// micromark_with_options("a {//\n} b", &swc)?, -// "

a b

", -// "should support a line comment followed by a line ending" -// ); - -// assert_eq!( -// micromark_with_options("a {// b\nd} d", &swc)?, -// "

a d

", -// "should support a line comment followed by a line ending and an expression" -// ); - -// assert_eq!( -// micromark_with_options("a {b// c\n} d", &swc)?, -// "

a d

", -// "should support an expression followed by a line comment and a line ending" -// ); - -// assert_eq!( -// micromark_with_options("a {/*b*/ // c\n} d", &swc)?, -// "

a d

", -// "should support comments (1)" -// ); - -// assert_eq!( -// micromark_with_options("a {b.c} d", &swc)?, -// "

a d

", -// "should support expression statements (1)" -// ); - -// assert_eq!( -// micromark_with_options("a {1 + 1} b", &swc)?, -// "

a b

", -// "should support expression statements (2)" -// ); - -// assert_eq!( -// micromark_with_options("a {function () {}} b", &swc)?, -// "

a b

", -// "should support expression statements (3)" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {var b = \"c\"} d", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on non-expressions" -// // ); - -// assert_eq!( -// micromark_with_options("> a {\n> b} c", &swc)?, -// "
\n

a c

\n
", -// "should support expressions in containers" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("> a {\n> b<} c", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on incorrect expressions in containers (1)" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("> a {\n> b\n> c} d", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected content after expression/, -// // "should crash on incorrect expressions in containers (2)" -// // ); - -// Ok(()) -// } +#[test] +fn mdx_expression_text_gnostic_core() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("a {} b", &swc)?, + "

a b

", + "should support an empty expression (1)" + ); + + assert_eq!( + micromark_with_options("a { \t\r\n} b", &swc)?, + "

a b

", + "should support an empty expression (2)" + ); + + assert_eq!( + micromark_with_options("a {/**/} b", &swc)?, + "

a b

", + "should support a multiline comment (1)" + ); + + assert_eq!( + micromark_with_options("a { /*\n*/\t} b", &swc)?, + "

a b

", + "should support a multiline comment (2)" + ); + + assert_eq!( + micromark_with_options("a {/*b*//*c*/} d", &swc)?, + "

a d

", + "should support a multiline comment (3)" + ); + + assert_eq!( + micromark_with_options("a {b/*c*/} d", &swc)?, + "

a d

", + "should support a multiline comment (4)" + ); + + assert_eq!( + micromark_with_options("a {/*b*/c} d", &swc)?, + "

a d

", + "should support a multiline comment (4)" + ); + + assert_eq!( + micromark_with_options("a {//} b", &swc).err().unwrap(), + "1:4: Could not parse expression with swc: Unexpected eof", + "should crash on an incorrect line comment (1)" + ); + + assert_eq!( + micromark_with_options("a { // b } c", &swc).err().unwrap(), + "1:4: Could not parse expression with swc: Unexpected eof", + "should crash on an incorrect line comment (2)" + ); + + assert_eq!( + micromark_with_options("a {//\n} b", &swc)?, + "

a b

", + "should support a line comment followed by a line ending" + ); + + assert_eq!( + micromark_with_options("a {// b\nd} d", &swc)?, + "

a d

", + "should support a line comment followed by a line ending and an expression" + ); + + assert_eq!( + micromark_with_options("a {b// c\n} d", &swc)?, + "

a d

", + "should support an expression followed by a line comment and a line ending" + ); + + assert_eq!( + micromark_with_options("a {/*b*/ // c\n} d", &swc)?, + "

a d

", + "should support comments (1)" + ); + + assert_eq!( + micromark_with_options("a {b.c} d", &swc)?, + "

a d

", + "should support expression statements (1)" + ); + + assert_eq!( + micromark_with_options("a {1 + 1} b", &swc)?, + "

a b

", + "should support expression statements (2)" + ); + + assert_eq!( + micromark_with_options("a {function () {}} b", &swc)?, + "

a b

", + "should support expression statements (3)" + ); + + assert_eq!( + micromark_with_options("a {var b = \"c\"} d", &swc).err().unwrap(), + "1:7: Could not parse expression with swc: Unexpected token `var`. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, ` for template literal, (, or an identifier", + "should crash on non-expressions" + ); + + assert_eq!( + micromark_with_options("> a {\n> b} c", &swc)?, + "
\n

a c

\n
", + "should support expressions in containers" + ); + + assert_eq!( + micromark_with_options("> a {\n> b<} c", &swc) + .err() + .unwrap(), + "2:8: Could not parse expression with swc: Unexpected eof", + "should crash on incorrect expressions in containers (1)" + ); + + assert_eq!( + micromark_with_options("> a {\n> b\n> c} d", &swc) + .err() + .unwrap(), + "3:3: Could not parse expression with swc: Unexpected content after expression", + "should crash on incorrect expressions in containers (2)" + ); + + Ok(()) +} #[test] fn mdx_expression_text_agnostic() -> Result<(), String> { @@ -197,77 +194,74 @@ fn mdx_expression_text_agnostic() -> Result<(), String> { Ok(()) } -// // To do: swc. -// #[test] -// fn mdx_expression_text_gnostic() -> Result<(), String> { -// assert_eq!( -// micromark_with_options("a {b} c", &swc)?, -// "

a c

", -// "should support an expression" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {??} b", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on an incorrect expression" -// // ); - -// assert_eq!( -// micromark_with_options("a {} b", &swc)?, -// "

a b

", -// "should support an empty expression" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {b c", &swc); -// // }, -// // /Unexpected end of file in expression, expected a corresponding closing brace for `{`/, -// // "should crash if no closing brace is found (1)" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {b { c } d", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected content after expression/, -// // "should crash if no closing brace is found (2)" -// // ); - -// assert_eq!( -// micromark_with_options("a {\n} b", &swc)?, -// "

a b

", -// "should support a line ending in an expression" -// ); - -// assert_eq!( -// micromark_with_options("a } b", &swc)?, -// "

a } b

", -// "should support just a closing brace" -// ); - -// assert_eq!( -// micromark_with_options("{ a } b", &swc)?, -// "

b

", -// "should support expressions as the first thing when following by other things" -// ); - -// assert_eq!( -// micromark_with_options("a { /* { */ } b", &swc)?, -// "

a b

", -// "should support an unbalanced opening brace (if JS permits)" -// ); - -// assert_eq!( -// micromark_with_options("a { /* } */ } b", &swc)?, -// "

a b

", -// "should support an unbalanced closing brace (if JS permits)" -// ); - -// Ok(()) -// } +#[test] +fn mdx_expression_text_gnostic() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("a {b} c", &swc)?, + "

a c

", + "should support an expression" + ); + + assert_eq!( + micromark_with_options("a {??} b", &swc).err().unwrap(), + "1:9: Could not parse expression with swc: Unexpected eof", + "should crash on an incorrect expression" + ); + + assert_eq!( + micromark_with_options("a {} b", &swc)?, + "

a b

", + "should support an empty expression" + ); + + assert_eq!( + micromark_with_options("a {b c", &swc).err().unwrap(), + "1:7: Unexpected end of file in expression, expected a corresponding closing brace for `{`", + "should crash if no closing brace is found (1)" + ); + + assert_eq!( + micromark_with_options("a {b { c } d", &swc).err().unwrap(), + "1:6: Could not parse expression with swc: Unexpected content after expression", + "should crash if no closing brace is found (2)" + ); + + assert_eq!( + micromark_with_options("a {\n} b", &swc)?, + "

a b

", + "should support a line ending in an expression" + ); + + assert_eq!( + micromark_with_options("a } b", &swc)?, + "

a } b

", + "should support just a closing brace" + ); + + assert_eq!( + micromark_with_options("{ a } b", &swc)?, + "

b

", + "should support expressions as the first thing when following by other things" + ); + + assert_eq!( + micromark_with_options("a { /* { */ } b", &swc)?, + "

a b

", + "should support an unbalanced opening brace (if JS permits)" + ); + + assert_eq!( + micromark_with_options("a { /* } */ } b", &swc)?, + "

a b

", + "should support an unbalanced closing brace (if JS permits)" + ); + + Ok(()) +} diff --git a/tests/mdx_jsx_text.rs b/tests/mdx_jsx_text.rs index cf507ee..be76d6f 100644 --- a/tests/mdx_jsx_text.rs +++ b/tests/mdx_jsx_text.rs @@ -1,6 +1,8 @@ extern crate micromark; +mod test_utils; use micromark::{micromark_with_options, Constructs, Options}; use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; #[test] fn mdx_jsx_text_core() -> Result<(), String> { @@ -84,99 +86,97 @@ fn mdx_jsx_text_agnosic() -> Result<(), String> { #[test] fn mdx_jsx_text_gnostic() -> Result<(), String> { - let mdx = Options { + let swc = Options { constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), ..Options::default() }; assert_eq!( - micromark_with_options("a c", &mdx)?, + micromark_with_options("a c", &swc)?, "

a c

", "should support a self-closing element" ); assert_eq!( - micromark_with_options("a c d", &mdx)?, + micromark_with_options("a c d", &swc)?, "

a c d

", "should support a closed element" ); assert_eq!( - micromark_with_options("a c", &mdx)?, + micromark_with_options("a c", &swc)?, "

a c

", "should support an unclosed element" ); assert_eq!( - micromark_with_options("a d", &mdx)?, + micromark_with_options("a d", &swc)?, "

a d

", "should support an attribute expression" ); assert_eq!( - micromark_with_options("a f", &mdx)?, + micromark_with_options("a f", &swc)?, "

a f

", "should support more complex attribute expression (1)" ); assert_eq!( - micromark_with_options("a d", &mdx)?, + micromark_with_options("a d", &swc)?, "

a d

", "should support more complex attribute expression (2)" ); assert_eq!( - micromark_with_options("a d", &mdx)?, + micromark_with_options("a d", &swc)?, "

a d

", "should support an attribute value expression" ); assert_eq!( - micromark_with_options("a d", &mdx) + micromark_with_options("a d", &swc) .err() .unwrap(), - "1:9: Unexpected empty expression, expected a value between braces", + "1:15: Could not parse expression with swc: Unexpected eof", "should crash on an empty attribute value expression" ); - // To do: swc. - // assert_eq!( - // micromark_with_options("a c", &swc) - // .err() - // .unwrap(), - // "Could not parse expression with acorn: Unexpected token", - // "should crash on a non-spread attribute expression" - // ); - - // To do: swc. - // assert_eq!( - // micromark_with_options("a d", &swc) - // .err() - // .unwrap(), - // "Could not parse expression with acorn: Unexpected token", - // "should crash on invalid JS in an attribute value expression" - // ); - - // To do: swc. - // assert_eq!( - // micromark_with_options("a c", &swc) - // .err() - // .unwrap(), - // "Could not parse expression with acorn: Unexpected token", - // "should crash on invalid JS in an attribute expression" - // ); - - // To do: swc. - // assert_eq!( - // micromark_with_options("a f", &swc) - // .err() - // .unwrap(), - // "Unexpected `ExpressionStatement` in code: expected an object spread", - // "should crash on invalid JS in an attribute expression (2)" - // ); - - assert_eq!( - micromark_with_options("a } /> f", &mdx)?, + assert_eq!( + micromark_with_options("a c", &swc) + .err() + .unwrap(), + "1:18: Could not parse expression with swc: Expected ',', got '}'", + "should crash on a non-spread attribute expression" + ); + + assert_eq!( + micromark_with_options("a d", &swc) + .err() + .unwrap(), + "1:16: Could not parse expression with swc: Unexpected token `?`. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, ` for template literal, (, or an identifier", + "should crash on invalid JS in an attribute value expression" + ); + + assert_eq!( + micromark_with_options("a c", &swc) + .err() + .unwrap(), + "1:14: Could not parse expression with swc: Unexpected token `?`. Expected identifier, string literal, numeric literal or [ for the computed key", + "should crash on invalid JS in an attribute expression" + ); + + assert_eq!( + micromark_with_options("a f", &swc) + .err() + .unwrap(), + "1:6: Expected a single spread value, such as `...x`", + "should crash on invalid JS in an attribute expression (2)" + ); + + assert_eq!( + micromark_with_options("a } /> f", &swc)?, "

a f

", "should support parenthesized expressions" ); diff --git a/tests/mdx_swc.rs b/tests/mdx_swc.rs new file mode 100644 index 0000000..c9a2a61 --- /dev/null +++ b/tests/mdx_swc.rs @@ -0,0 +1,47 @@ +extern crate micromark; +mod test_utils; +use micromark::{micromark_with_options, Constructs, Options}; +use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; + +#[test] +fn mdx_swc() -> Result<(), String> { + let mdx = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("{'}'}", &mdx)?, + "", + "should support JavaScript-aware flow expressions w/ `mdx_expression_parse`" + ); + + assert_eq!( + micromark_with_options("a {'}'} b", &mdx)?, + "

a b

", + "should support JavaScript-aware text expressions w/ `mdx_expression_parse`" + ); + + assert_eq!( + micromark_with_options("
", &mdx)?, + "", + "should support JavaScript-aware attribute expressions w/ `mdx_expression_parse`" + ); + + assert_eq!( + micromark_with_options("", &mdx)?, + "", + "should support JavaScript-aware attribute value expressions w/ `mdx_expression_parse`" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n\nexport {a}\n\n# c", &mdx)?, + "

c

", + "should support JavaScript-aware ESM w/ `mdx_esm_parse`" + ); + + Ok(()) +} diff --git a/tests/test_utils/mod.rs b/tests/test_utils/mod.rs new file mode 100644 index 0000000..10b9643 --- /dev/null +++ b/tests/test_utils/mod.rs @@ -0,0 +1,245 @@ +extern crate micromark; +extern crate swc_common; +extern crate swc_ecma_ast; +extern crate swc_ecma_parser; +use micromark::{MdxExpressionKind, MdxSignal}; +use swc_common::{source_map::Pos, BytePos, FileName, SourceFile, Spanned}; +use swc_ecma_ast::{EsVersion, Expr, Module}; +use swc_ecma_parser::{ + error::Error as SwcError, parse_file_as_expr, parse_file_as_module, EsConfig, Syntax, +}; + +/// Parse ESM in MDX with SWC. +pub fn parse_esm(value: &str) -> MdxSignal { + let (file, syntax, version) = create_config(value.to_string()); + let mut errors = vec![]; + let result = parse_file_as_module(&file, syntax, version, None, &mut errors); + + match result { + Err(error) => swc_error_to_signal(&error, value.len(), 0, "esm"), + Ok(tree) => { + if errors.is_empty() { + check_esm_ast(tree) + } else { + if errors.len() > 1 { + println!("parse_esm: todo: multiple errors? {:?}", errors); + } + swc_error_to_signal(&errors[0], value.len(), 0, "esm") + } + } + } +} + +/// Parse expressions in MDX with SWC. +pub fn parse_expression(value: &str, kind: MdxExpressionKind) -> MdxSignal { + // Empty expressions are OK. + if matches!(kind, MdxExpressionKind::Expression) + && matches!(whitespace_and_comments(0, value), MdxSignal::Ok) + { + return MdxSignal::Ok; + } + + // For attribute expression, a spread is needed, for which we have to prefix + // and suffix the input. + // See `check_expression_ast` for how the AST is verified. + let (prefix, suffix) = if matches!(kind, MdxExpressionKind::AttributeExpression) { + ("({", "})") + } else { + ("", "") + }; + + let (file, syntax, version) = create_config(format!("{}{}{}", prefix, value, suffix)); + let mut errors = vec![]; + let result = parse_file_as_expr(&file, syntax, version, None, &mut errors); + + match result { + Err(error) => swc_error_to_signal(&error, value.len(), prefix.len(), "expression"), + Ok(tree) => { + if errors.is_empty() { + let place = fix_swc_position(tree.span().hi.to_usize(), prefix.len()); + let result = check_expression_ast(tree, kind); + if matches!(result, MdxSignal::Ok) { + whitespace_and_comments(place, value) + } else { + result + } + } else { + if errors.len() > 1 { + unreachable!("parse_expression: todo: multiple errors? {:?}", errors); + } + swc_error_to_signal(&errors[0], value.len(), prefix.len(), "expression") + } + } + } +} + +/// Check that the resulting AST of ESM is OK. +/// +/// This checks that only module declarations (import/exports) are used, not +/// statements. +fn check_esm_ast(tree: Module) -> MdxSignal { + let mut index = 0; + while index < tree.body.len() { + let node = &tree.body[index]; + + if !node.is_module_decl() { + let place = fix_swc_position(node.span().hi.to_usize(), 0); + return MdxSignal::Error( + "Unexpected statement in code: only import/exports are supported".to_string(), + place, + ); + } + + index += 1; + } + + MdxSignal::Ok +} + +/// Check that the resulting AST of an expressions is OK. +/// +/// This checks that attribute expressions are the expected spread. +fn check_expression_ast(tree: Box, kind: MdxExpressionKind) -> MdxSignal { + if matches!(kind, MdxExpressionKind::AttributeExpression) + && tree + .unwrap_parens() + .as_object() + .and_then(|object| { + if object.props.len() == 1 { + object.props[0].as_spread() + } else { + None + } + }) + .is_none() + { + MdxSignal::Error( + "Expected a single spread value, such as `...x`".to_string(), + 0, + ) + } else { + MdxSignal::Ok + } +} + +/// Turn an SWC error into an `MdxSignal`. +/// +/// * If the error happens at `value_len`, yields `MdxSignal::Eof` +/// * Else, yields `MdxSignal::Error`. +fn swc_error_to_signal( + error: &SwcError, + value_len: usize, + prefix_len: usize, + name: &str, +) -> MdxSignal { + let message = error.kind().msg().to_string(); + let place = fix_swc_position(error.span().hi.to_usize(), prefix_len); + let message = format!("Could not parse {} with swc: {}", name, message); + + if place >= value_len { + MdxSignal::Eof(message) + } else { + MdxSignal::Error(message, place) + } +} + +/// Move past JavaScript whitespace (well, actually ASCII whitespace) and +/// comments. +/// +/// This is needed because for expressions, we use an API that parses up to +/// a valid expression, but there may be more expressions after it, which we +/// don’t alow. +fn whitespace_and_comments(mut index: usize, value: &str) -> MdxSignal { + let bytes = value.as_bytes(); + let len = bytes.len(); + let mut in_multiline = false; + let mut in_line = false; + + while index < len { + // In a multiline comment: `/* a */`. + if in_multiline { + if index + 1 < len && bytes[index] == b'*' && bytes[index + 1] == b'/' { + index += 1; + in_multiline = false; + } + } + // In a line comment: `// a`. + else if in_line { + if index + 1 < len && bytes[index] == b'\r' && bytes[index + 1] == b'\n' { + index += 1; + in_line = false; + } else if bytes[index] == b'\r' || bytes[index] == b'\n' { + in_line = false; + } + } + // Not in a comment, opening a multiline comment: `/* a */`. + else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'*' { + index += 1; + in_multiline = true; + } + // Not in a comment, opening a line comment: `// a`. + else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'/' { + index += 1; + in_line = true; + } + // Outside comment, whitespace. + else if bytes[index].is_ascii_whitespace() { + // Fine! + } + // Outside comment, not whitespace. + else { + return MdxSignal::Error( + "Could not parse expression with swc: Unexpected content after expression" + .to_string(), + index, + ); + } + + index += 1; + } + + if in_multiline { + MdxSignal::Error( + "Could not parse expression with swc: Unexpected unclosed multiline comment, expected closing: `*/`".to_string(), + index, + ) + } else if in_line { + // EOF instead of EOL is specifically not allowed, because that would + // mean the closing brace is on the commented-out line + MdxSignal::Error( + "Could not parse expression with swc: Unexpected unclosed line comment, expected line ending: `\\n`".to_string(), + index, + ) + } else { + MdxSignal::Ok + } +} + +/// Create configuration for SWC, shared between ESM and expressions. +/// +/// This enables modern JavaScript (ES2022) + JSX. +fn create_config(source: String) -> (SourceFile, Syntax, EsVersion) { + ( + // File. + SourceFile::new( + FileName::Anon, + false, + FileName::Anon, + source, + BytePos::from_usize(1), + ), + // Syntax. + Syntax::Es(EsConfig { + jsx: true, + ..EsConfig::default() + }), + // Version. + EsVersion::Es2022, + ) +} + +/// Turn an SWC byte position from a resulting AST to an offset in the original +/// input string. +fn fix_swc_position(index: usize, prefix_len: usize) -> usize { + index - 1 - prefix_len +} -- cgit