diff options
Diffstat (limited to '')
-rw-r--r-- | Cargo.toml | 5 | ||||
-rw-r--r-- | src/compiler.rs | 3 | ||||
-rw-r--r-- | src/construct/flow.rs | 8 | ||||
-rw-r--r-- | src/construct/mdx_esm.rs | 224 | ||||
-rw-r--r-- | src/construct/mod.rs | 2 | ||||
-rw-r--r-- | src/construct/partial_mdx_expression.rs | 98 | ||||
-rw-r--r-- | src/construct/partial_mdx_jsx.rs | 3 | ||||
-rw-r--r-- | src/event.rs | 42 | ||||
-rw-r--r-- | src/lib.rs | 180 | ||||
-rw-r--r-- | src/state.rs | 20 | ||||
-rw-r--r-- | src/tokenizer.rs | 4 | ||||
-rw-r--r-- | src/util/mdx_collect.rs | 70 | ||||
-rw-r--r-- | src/util/mod.rs | 1 | ||||
-rw-r--r-- | tests/code_indented.rs | 6 | ||||
-rw-r--r-- | tests/gfm_table.rs | 6 | ||||
-rw-r--r-- | tests/math_text.rs | 14 | ||||
-rw-r--r-- | tests/mdx_esm.rs | 241 | ||||
-rw-r--r-- | tests/mdx_expression_flow.rs | 281 | ||||
-rw-r--r-- | tests/mdx_expression_text.rs | 422 | ||||
-rw-r--r-- | tests/mdx_jsx_text.rs | 96 | ||||
-rw-r--r-- | tests/mdx_swc.rs | 47 | ||||
-rw-r--r-- | tests/test_utils/mod.rs | 245 |
22 files changed, 1573 insertions, 445 deletions
@@ -24,8 +24,11 @@ unicode-id = { version = "0.3", features = ["no_std"] } [dev-dependencies] env_logger = "0.9" -criterion = "0.3" +criterion = "0.4" pretty_assertions = "1" +swc_common = "0.28" +swc_ecma_parser = "0.119" +swc_ecma_ast = "0.91" [build-dependencies] regex = "1" diff --git a/src/compiler.rs b/src/compiler.rs index d1ac774..eaa15ee 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -364,6 +364,7 @@ fn enter(context: &mut CompileContext) { | Name::HeadingAtxText | Name::HeadingSetextText | Name::Label + | Name::MdxEsm | Name::MdxFlowExpression | Name::MdxTextExpression | Name::MdxJsxFlowTag @@ -412,7 +413,7 @@ fn exit(context: &mut CompileContext) { | Name::Resource => { on_exit_drop(context); } - Name::MdxFlowExpression | Name::MdxJsxFlowTag => on_exit_drop_slurp(context), + Name::MdxEsm | Name::MdxFlowExpression | Name::MdxJsxFlowTag => on_exit_drop_slurp(context), Name::CharacterEscapeValue | Name::CodeTextData | Name::Data | Name::MathTextData => { on_exit_data(context); } diff --git a/src/construct/flow.rs b/src/construct/flow.rs index 08e0466..d6a79d8 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -15,6 +15,7 @@ //! * [Heading (atx)][crate::construct::heading_atx] //! * [Heading (setext)][crate::construct::heading_setext] //! * [HTML (flow)][crate::construct::html_flow] +//! * [MDX esm][crate::construct::mdx_esm] //! * [MDX expression (flow)][crate::construct::mdx_expression_flow] //! * [MDX JSX (flow)][crate::construct::mdx_jsx_flow] //! * [Raw (flow)][crate::construct::raw_flow] (code (fenced), math (flow)) @@ -66,6 +67,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::HtmlFlowStart) } + Some(b'e' | b'i') => { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeContent), + ); + State::Retry(StateName::MdxEsmStart) + } Some(b'{') => { tokenizer.attempt( State::Next(StateName::FlowAfter), diff --git a/src/construct/mdx_esm.rs b/src/construct/mdx_esm.rs new file mode 100644 index 0000000..53f8beb --- /dev/null +++ b/src/construct/mdx_esm.rs @@ -0,0 +1,224 @@ +//! MDX ESM occurs in the [flow][] content type. +//! +//! ## Grammar +//! +//! MDX expression (flow) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! mdx_esm ::= word *line *(eol *line) +//! +//! word ::= 'e' 'x' 'p' 'o' 'r' 't' | 'i' 'm' 'p' 'o' 'r' 't' +//! ``` +//! +//! This construct must be followed by a blank line or eof (end of file). +//! It can include blank lines if [`MdxEsmParse`][crate::MdxEsmParse] passed in +//! `options.mdx_esm_parse` allows it. +//! +//! ## Tokens +//! +//! * [`LineEnding`][Name::LineEnding] +//! * [`MdxEsm`][Name::MdxEsm] +//! * [`MdxEsmData`][Name::MdxEsmData] +//! +//! ## References +//! +//! * [`syntax.js` in `micromark-extension-mdxjs-esm`](https://github.com/micromark/micromark-extension-mdxjs-esm/blob/main/dev/lib/syntax.js) +//! * [`mdxjs.com`](https://mdxjs.com) +//! +//! [flow]: crate::construct::flow + +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; +use crate::util::{ + mdx_collect::{collect, place_to_point}, + slice::Slice, +}; +use crate::MdxSignal; +use alloc::format; + +/// Start of MDX ESM. +/// +/// ```markdown +/// > | import a from 'b' +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + // If it’s turned on. + if tokenizer.parse_state.options.constructs.mdx_esm + // If there is a gnostic parser. + && tokenizer.parse_state.options.mdx_esm_parse.is_some() + // When not interrupting. + && !tokenizer.interrupt + // Only at the start of a line, not at whitespace or in a container. + && tokenizer.point.column == 1 + && matches!(tokenizer.current, Some(b'e' | b'i')) + { + // Place where keyword starts. + tokenizer.tokenize_state.start = tokenizer.point.index; + tokenizer.enter(Name::MdxEsm); + tokenizer.enter(Name::MdxEsmData); + tokenizer.consume(); + State::Next(StateName::MdxEsmWord) + } else { + State::Nok + } +} + +/// In keyword. +/// +/// ```markdown +/// > | import a from 'b' +/// ^^^^^^ +/// ``` +pub fn word(tokenizer: &mut Tokenizer) -> State { + if matches!(tokenizer.current, Some(b'a'..=b'z')) { + tokenizer.consume(); + State::Next(StateName::MdxEsmWord) + } else { + let slice = Slice::from_indices( + tokenizer.parse_state.bytes, + tokenizer.tokenize_state.start, + tokenizer.point.index, + ); + + if matches!(slice.as_str(), "export" | "import") && tokenizer.current == Some(b' ') { + tokenizer.concrete = true; + tokenizer.tokenize_state.start = tokenizer.events.len() - 1; + tokenizer.consume(); + State::Next(StateName::MdxEsmInside) + } else { + tokenizer.tokenize_state.start = 0; + State::Nok + } + } +} + +/// In data. +/// +/// ```markdown +/// > | import a from 'b' +/// ^ +/// ``` +pub fn inside(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + tokenizer.exit(Name::MdxEsmData); + State::Retry(StateName::MdxEsmLineStart) + } + _ => { + tokenizer.consume(); + State::Next(StateName::MdxEsmInside) + } + } +} + +/// At start of line. +/// +/// ```markdown +/// | import a from 'b' +/// > | export {a} +/// ^ +/// ``` +pub fn line_start(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => State::Retry(StateName::MdxEsmAtEnd), + Some(b'\n') => { + tokenizer.check( + State::Next(StateName::MdxEsmAtEnd), + State::Next(StateName::MdxEsmContinuationStart), + ); + State::Retry(StateName::MdxEsmBlankLineBefore) + } + _ => { + tokenizer.enter(Name::MdxEsmData); + tokenizer.consume(); + State::Next(StateName::MdxEsmInside) + } + } +} + +/// At start of line that continues. +/// +/// ```markdown +/// | import a from 'b' +/// > | export {a} +/// ^ +/// ``` +pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::MdxEsmLineStart) +} + +/// At start of a potentially blank line. +/// +/// ```markdown +/// | import a from 'b' +/// > | export {a} +/// ^ +/// ``` +pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::BlankLineStart) +} + +/// At end of line (blank or eof). +/// +/// ```markdown +/// > | import a from 'b' +/// ^ +/// ``` +pub fn at_end(tokenizer: &mut Tokenizer) -> State { + let result = parse_esm(tokenizer); + + // Done!. + if matches!(result, State::Ok) { + tokenizer.concrete = false; + tokenizer.exit(Name::MdxEsm); + } + + result +} + +/// Parse ESM with a given function. +fn parse_esm(tokenizer: &mut Tokenizer) -> State { + // We can `unwrap` because we don’t parse if this is `None`. + let parse = tokenizer + .parse_state + .options + .mdx_esm_parse + .as_ref() + .unwrap(); + + // Collect the body of the ESM and positional info for each run of it. + let result = collect( + tokenizer, + tokenizer.tokenize_state.start, + &[Name::MdxEsmData, Name::LineEnding], + ); + + // Parse and handle what was signaled back. + match parse(&result.value) { + MdxSignal::Ok => State::Ok, + MdxSignal::Error(message, place) => { + let point = place_to_point(&result, place); + State::Error(format!("{}:{}: {}", point.line, point.column, message)) + } + MdxSignal::Eof(message) => { + if tokenizer.current == None { + State::Error(format!( + "{}:{}: {}", + tokenizer.point.line, tokenizer.point.column, message + )) + } else { + tokenizer.tokenize_state.mdx_last_parse_error = Some(message); + State::Retry(StateName::MdxEsmContinuationStart) + } + } + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ae6facf..88f3050 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -66,6 +66,7 @@ //! * [gfm task list item check][gfm_task_list_item_check] //! * [mdx expression (flow)][mdx_expression_flow] //! * [mdx expression (text)][mdx_expression_text] +//! * [mdx esm][mdx_esm] //! * [mdx jsx (flow)][mdx_jsx_flow] //! * [mdx jsx (text)][mdx_jsx_text] //! @@ -169,6 +170,7 @@ pub mod label_end; pub mod label_start_image; pub mod label_start_link; pub mod list_item; +pub mod mdx_esm; pub mod mdx_expression_flow; pub mod mdx_expression_text; pub mod mdx_jsx_flow; diff --git a/src/construct/partial_mdx_expression.rs b/src/construct/partial_mdx_expression.rs index 31a9af8..3ebd0f0 100644 --- a/src/construct/partial_mdx_expression.rs +++ b/src/construct/partial_mdx_expression.rs @@ -14,7 +14,6 @@ //! ## Tokens //! //! * [`LineEnding`][Name::LineEnding] -//! * [`SpaceOrTab`][Name::SpaceOrTab] //! * [`MdxExpressionMarker`][Name::MdxExpressionMarker] //! * [`MdxExpressionData`][Name::MdxExpressionData] //! @@ -61,7 +60,12 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max; use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -use alloc::format; +use crate::util::{ + constant::TAB_SIZE, + mdx_collect::{collect, place_to_point}, +}; +use crate::{MdxExpressionKind, MdxExpressionParse, MdxSignal}; +use alloc::{format, string::ToString}; /// Start of an MDX expression. /// @@ -75,6 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Name::MdxExpressionMarker); tokenizer.consume(); tokenizer.exit(Name::MdxExpressionMarker); + tokenizer.tokenize_state.start = tokenizer.events.len() - 1; State::Next(StateName::MdxExpressionBefore) } @@ -88,8 +93,10 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { State::Error(format!( - "{}:{}: Unexpected end of file in expression, expected a corresponding closing brace for `{{`", - tokenizer.point.line, tokenizer.point.column + "{}:{}: {}", + tokenizer.point.line, tokenizer.point.column, + tokenizer.tokenize_state.mdx_last_parse_error.take() + .unwrap_or_else(|| "Unexpected end of file in expression, expected a corresponding closing brace for `{`".to_string()) )) } Some(b'\n') => { @@ -97,24 +104,26 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(Name::LineEnding); State::Next(StateName::MdxExpressionEolAfter) - }, + } Some(b'}') if tokenizer.tokenize_state.size == 0 => { - if tokenizer.tokenize_state.token_1 == Name::MdxJsxTagAttributeValueExpression && !tokenizer.tokenize_state.seen { - State::Error(format!( - "{}:{}: Unexpected empty expression, expected a value between braces", - tokenizer.point.line, tokenizer.point.column - )) + let state = if let Some(ref parse) = tokenizer.parse_state.options.mdx_expression_parse + { + parse_expression(tokenizer, parse) } else { - tokenizer.tokenize_state.seen = false; + State::Ok + }; + + if state == State::Ok { + tokenizer.tokenize_state.start = 0; tokenizer.enter(Name::MdxExpressionMarker); tokenizer.consume(); tokenizer.exit(Name::MdxExpressionMarker); tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); - State::Ok } - }, + + state + } Some(_) => { - tokenizer.tokenize_state.seen = true; tokenizer.enter(Name::MdxExpressionData); State::Retry(StateName::MdxExpressionInside) } @@ -134,8 +143,10 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Name::MdxExpressionData); State::Retry(StateName::MdxExpressionBefore) } else { - // To do: don’t count if gnostic. - if tokenizer.current == Some(b'{') { + // Don’t count if gnostic. + if tokenizer.current == Some(b'{') + && tokenizer.parse_state.options.mdx_expression_parse.is_none() + { tokenizer.tokenize_state.size += 1; } else if tokenizer.current == Some(b'}') { tokenizer.tokenize_state.size -= 1; @@ -165,9 +176,60 @@ pub fn eol_after(tokenizer: &mut Tokenizer) -> State { )) } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt(State::Next(StateName::MdxExpressionBefore), State::Nok); - // To do: use `start_column` + constants.tabSize for max space to eat. - State::Next(space_or_tab_min_max(tokenizer, 0, usize::MAX)) + // Idea: investigate if we’d need to use more complex stripping. + // Take this example: + // + // ```markdown + // > aaa <b c={` + // > d + // > `} /> eee + // ``` + // + // Currently, the “paragraph” starts at `> | aaa`, so for the next line + // here we split it into `>␠|␠␠␠␠|␠d` (prefix, this indent here, + // expression data). + // The intention above is likely for the split to be as `>␠␠|␠␠␠␠|d`, + // which is impossible, but we can mimick it with `>␠|␠␠␠␠␠|d`. + // + // To improve the situation, we could take `tokenizer.line_start` at + // the start of the expression and move past whitespace. + // For future lines, we’d move at most to + // `line_start_shifted.column + 4`. + State::Retry(space_or_tab_min_max(tokenizer, 0, TAB_SIZE)) } else { State::Retry(StateName::MdxExpressionBefore) } } + +/// Parse an expression with a given function. +fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> State { + // Collect the body of the expression and positional info for each run of it. + let result = collect( + tokenizer, + tokenizer.tokenize_state.start, + &[Name::MdxExpressionData, Name::LineEnding], + ); + + // Turn the name of the expression into a kind. + let kind = match tokenizer.tokenize_state.token_1 { + Name::MdxFlowExpression | Name::MdxTextExpression => MdxExpressionKind::Expression, + Name::MdxJsxTagAttributeExpression => MdxExpressionKind::AttributeExpression, + Name::MdxJsxTagAttributeValueExpression => MdxExpressionKind::AttributeValueExpression, + _ => unreachable!("cannot handle unknown expression name"), + }; + + // Parse and handle what was signaled back. + match parse(&result.value, kind) { + MdxSignal::Ok => State::Ok, + MdxSignal::Error(message, place) => { + let point = place_to_point(&result, place); + State::Error(format!("{}:{}: {}", point.line, point.column, message)) + } + MdxSignal::Eof(message) => { + tokenizer.tokenize_state.mdx_last_parse_error = Some(message); + tokenizer.enter(Name::MdxExpressionData); + tokenizer.consume(); + State::Next(StateName::MdxExpressionInside) + } + } +} diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs index 9177b5b..e49a8e0 100644 --- a/src/construct/partial_mdx_jsx.rs +++ b/src/construct/partial_mdx_jsx.rs @@ -611,8 +611,6 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State { Some(b'>') => State::Retry(StateName::MdxJsxTagEnd), // Attribute expression. Some(b'{') => { - // To do: force `spread: true` if gnostic. - // To do: pass `start_point` if gnostic. tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone(); tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeExpression; tokenizer.attempt( @@ -886,7 +884,6 @@ pub fn attribute_value_before(tokenizer: &mut Tokenizer) -> State { } // Attribute value expression. Some(b'{') => { - // To do: pass `start_point` if gnostic. tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone(); tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeValueExpression; tokenizer.attempt( diff --git a/src/event.rs b/src/event.rs index a2626ee..b3fa9ae 100644 --- a/src/event.rs +++ b/src/event.rs @@ -2391,6 +2391,45 @@ pub enum Name { /// ^ ^ /// ``` MathTextSequence, + /// MDX extension: ESM. + /// + /// ## Info + /// + /// * **Context**: + /// [flow content][crate::construct::flow] + /// * **Content model**: + /// void + /// [`MdxEsmData`][Name::MdxEsmData], + /// [`SpaceOrTab`][Name::SpaceOrTab], + /// [`LineEnding`][Name::LineEnding] + /// * **Construct**: + /// [`mdx_esm`][crate::construct::mdx_esm] + /// + /// ## Example + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + MdxEsm, + /// MDX extension: ESM data. + /// + /// ## Info + /// + /// * **Context**: + /// [`MdxEsm`][Name::MdxEsm] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`mdx_esm`][crate::construct::mdx_esm] + /// + /// ## Example + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + MdxEsmData, /// MDX extension: expression marker. /// /// ## Info @@ -3336,7 +3375,7 @@ pub enum Name { } /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 75] = [ +pub const VOID_EVENTS: [Name; 76] = [ Name::AttentionSequence, Name::AutolinkEmail, Name::AutolinkMarker, @@ -3391,6 +3430,7 @@ pub const VOID_EVENTS: [Name; 75] = [ Name::MathFlowChunk, Name::MathTextData, Name::MathTextSequence, + Name::MdxEsmData, Name::MdxExpressionMarker, Name::MdxExpressionData, Name::MdxJsxTagMarker, @@ -20,7 +20,7 @@ mod util; use crate::compiler::compile; use crate::parser::parse; -use alloc::string::String; +use alloc::{boxed::Box, fmt, string::String}; /// Type of line endings in markdown. #[derive(Clone, Debug, Default, Eq, PartialEq)] @@ -79,6 +79,71 @@ impl LineEnding { } } +/// Signal used as feedback when parsing MDX esm/expressions. +#[derive(Clone, Debug)] +pub enum MdxSignal { + /// A syntax error. + /// + /// `micromark-rs` will crash with error message `String`, and convert the + /// `usize` (byte offset into `&str` passed to `MdxExpressionParse` or + /// `MdxEsmParse`) to where it happened in the whole document. + /// + /// ## Examples + /// + /// ```rust ignore + /// MdxSignal::Error("Unexpected `\"`, expected identifier".to_string(), 1) + /// ``` + Error(String, usize), + /// An error at the end of the (partial?) expression. + /// + /// `micromark-rs` will either crash with error message `String` if it + /// doesn’t have any more text, or it will try again later when more text + /// is available. + /// + /// ## Examples + /// + /// ```rust ignore + /// MdxSignal::Eof("Unexpected end of file in string literal".to_string()) + /// ``` + Eof(String), + /// Done, successfully. + /// + /// `micromark-rs` knows that this is the end of a valid expression/esm and + /// continues with markdown. + /// + /// ## Examples + /// + /// ```rust ignore + /// MdxSignal::Ok + /// ``` + Ok, +} + +/// Expression kind. +#[derive(Clone, Debug)] +pub enum MdxExpressionKind { + /// Kind of expressions in prose: `# {Math.PI}` and `{Math.PI}`. + Expression, + /// Kind of expressions as attributes: `<a {...b}>` + AttributeExpression, + /// Kind of expressions as attribute values: `<a b={c}>`. + AttributeValueExpression, +} + +/// Signature of a function that parses expressions. +/// +/// Can be passed as `mdx_expression_parse` in [`Options`][] to support +/// expressions according to a certain grammar (typically, a programming +/// language). +pub type MdxExpressionParse = dyn Fn(&str, MdxExpressionKind) -> MdxSignal; + +/// Signature of a function that parses ESM. +/// +/// Can be passed as `mdx_esm_parse` in [`Options`][] to support +/// ESM according to a certain grammar (typically, a programming +/// language). +pub type MdxEsmParse = dyn Fn(&str) -> MdxSignal; + /// Control which constructs are enabled. /// /// Not all constructs can be configured. @@ -301,12 +366,28 @@ pub struct Constructs { /// ^^^ /// ``` pub math_text: bool, + /// MDX: ESM. + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + /// + /// > 👉 **Note**: you *must* pass [`options.mdx_esm_parse`][MdxEsmParse] + /// > too. + /// > Otherwise, this option has no affect. + pub mdx_esm: bool, /// MDX: expression (flow). /// /// ```markdown /// > | {Math.PI} /// ^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). pub mdx_expression_flow: bool, /// MDX: expression (text). /// @@ -314,6 +395,11 @@ pub struct Constructs { /// > | a {Math.PI} c /// ^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). pub mdx_expression_text: bool, /// MDX: JSX (flow). /// @@ -321,6 +407,11 @@ pub struct Constructs { /// > | <Component /> /// ^^^^^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions in JSX according to a certain grammar + /// > (typically, a programming language). pub mdx_jsx_flow: bool, /// MDX: JSX (text). /// @@ -328,6 +419,11 @@ pub struct Constructs { /// > | a <Component /> c /// ^^^^^^^^^^^^^ /// ``` + /// + /// > 👉 **Note**: you *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions in JSX according to a certain grammar + /// > (typically, a programming language). pub mdx_jsx_text: bool, /// Thematic break. /// @@ -370,6 +466,7 @@ impl Default for Constructs { list_item: true, math_flow: false, math_text: false, + mdx_esm: false, mdx_expression_flow: false, mdx_expression_text: false, mdx_jsx_flow: false, @@ -405,6 +502,13 @@ impl Constructs { /// This turns on `CommonMark`, turns off some conflicting constructs /// (autolinks, code (indented), html), and turns on MDX (JSX, /// expressions, ESM). + /// + /// > 👉 **Note**: you *must* pass [`options.mdx_esm_parse`][MdxEsmParse] + /// > to support ESM. + /// > You *can* pass + /// > [`options.mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). #[must_use] pub fn mdx() -> Self { Self { @@ -412,6 +516,7 @@ impl Constructs { code_indented: false, html_flow: false, html_text: false, + mdx_esm: true, mdx_expression_flow: true, mdx_expression_text: true, mdx_jsx_flow: true, @@ -423,8 +528,8 @@ impl Constructs { /// Configuration (optional). #[allow(clippy::struct_excessive_bools)] -#[derive(Clone, Debug)] pub struct Options { + // Note: when adding fields, don’t forget to add them to `fmt::Debug` below. /// Whether to allow (dangerous) HTML. /// The default is `false`, you can turn it on to `true` for trusted /// content. @@ -913,6 +1018,75 @@ pub struct Options { /// # } /// ``` pub math_text_single_dollar: bool, + + /// Function to parse expressions with. + /// + /// This can be used to parse expressions with a parser. + /// It can be used to support for arbitrary programming languages within + /// expressions. + /// + /// For an example that adds support for JavaScript with SWC, see + /// `tests/test_utils/mod.rs`. + pub mdx_expression_parse: Option<Box<MdxExpressionParse>>, + + /// Function to parse ESM with. + /// + /// This can be used to parse ESM with a parser. + /// It can be used to support for arbitrary programming languages within + /// ESM, however, the keywords (`export`, `import`) are currently hardcoded + /// JavaScript-specific. + /// + /// For an example that adds support for JavaScript with SWC, see + /// `tests/test_utils/mod.rs`. + pub mdx_esm_parse: Option<Box<MdxEsmParse>>, + // Note: when adding fields, don’t forget to add them to `fmt::Debug` below. +} + +impl fmt::Debug for Options { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Options") + .field("allow_dangerous_html", &self.allow_dangerous_html) + .field("allow_dangerous_protocol", &self.allow_dangerous_protocol) + .field("constructs", &self.constructs) + .field("default_line_ending", &self.default_line_ending) + .field("gfm_footnote_label", &self.gfm_footnote_label) + .field( + "gfm_footnote_label_tag_name", + &self.gfm_footnote_label_tag_name, + ) + .field( + "gfm_footnote_label_attributes", + &self.gfm_footnote_label_attributes, + ) + .field("gfm_footnote_back_label", &self.gfm_footnote_back_label) + .field( + "gfm_footnote_clobber_prefix", + &self.gfm_footnote_clobber_prefix, + ) + .field( + "gfm_strikethrough_single_tilde", + &self.gfm_strikethrough_single_tilde, + ) + .field("gfm_tagfilter", &self.gfm_tagfilter) + .field("math_text_single_dollar", &self.math_text_single_dollar) + .field( + "mdx_expression_parse", + if self.mdx_expression_parse.is_none() { + &"None" + } else { + &"Some([Function])" + }, + ) + .field( + "mdx_esm_parse", + if self.mdx_esm_parse.is_none() { + &"None" + } else { + &"Some([Function])" + }, + ) + .finish() + } } impl Default for Options { @@ -931,6 +1105,8 @@ impl Default for Options { gfm_strikethrough_single_tilde: true, gfm_tagfilter: false, math_text_single_dollar: true, + mdx_expression_parse: None, + mdx_esm_parse: None, } } } diff --git a/src/state.rs b/src/state.rs index 896761e..1cc2720 100644 --- a/src/state.rs +++ b/src/state.rs @@ -344,6 +344,14 @@ pub enum Name { ListItemContBlank, ListItemContFilled, + MdxEsmStart, + MdxEsmWord, + MdxEsmInside, + MdxEsmLineStart, + MdxEsmBlankLineBefore, + MdxEsmContinuationStart, + MdxEsmAtEnd, + MdxExpressionTextStart, MdxExpressionTextAfter, @@ -356,8 +364,6 @@ pub enum Name { MdxExpressionBefore, MdxExpressionInside, MdxExpressionEolAfter, - MdxJsxAttributeValueExpressionAfter, - MdxJsxAttributeExpressionAfter, MdxJsxFlowStart, MdxJsxFlowBefore, @@ -385,6 +391,7 @@ pub enum Name { MdxJsxLocalNameAfter, MdxJsxAttributeBefore, MdxJsxSelfClosing, + MdxJsxAttributeExpressionAfter, MdxJsxAttributePrimaryName, MdxJsxAttributePrimaryNameAfter, MdxJsxAttributeLocalNameBefore, @@ -393,6 +400,7 @@ pub enum Name { MdxJsxAttributeValueBefore, MdxJsxAttributeValueQuotedStart, MdxJsxAttributeValueQuoted, + MdxJsxAttributeValueExpressionAfter, NonLazyContinuationStart, NonLazyContinuationAfter, @@ -822,6 +830,14 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::ListItemContBlank => construct::list_item::cont_blank, Name::ListItemContFilled => construct::list_item::cont_filled, + Name::MdxEsmStart => construct::mdx_esm::start, + Name::MdxEsmWord => construct::mdx_esm::word, + Name::MdxEsmInside => construct::mdx_esm::inside, + Name::MdxEsmLineStart => construct::mdx_esm::line_start, + Name::MdxEsmBlankLineBefore => construct::mdx_esm::blank_line_before, + Name::MdxEsmContinuationStart => construct::mdx_esm::continuation_start, + Name::MdxEsmAtEnd => construct::mdx_esm::at_end, + Name::MdxExpressionStart => construct::partial_mdx_expression::start, Name::MdxExpressionBefore => construct::partial_mdx_expression::before, Name::MdxExpressionInside => construct::partial_mdx_expression::inside, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8441f7e..84d3d6d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -227,6 +227,9 @@ pub struct TokenizeState<'a> { /// List of defined GFM footnote definition identifiers. pub gfm_footnote_definitions: Vec<String>, + // Last error message provided at an EOF of an expression. + pub mdx_last_parse_error: Option<String>, + /// Whether to connect events. pub connect: bool, /// Marker. @@ -343,6 +346,7 @@ impl<'a> Tokenizer<'a> { document_at_first_paragraph_of_list_item: false, definitions: vec![], gfm_footnote_definitions: vec![], + mdx_last_parse_error: None, end: 0, label_starts: vec![], label_starts_loose: vec![], diff --git a/src/util/mdx_collect.rs b/src/util/mdx_collect.rs new file mode 100644 index 0000000..73ead51 --- /dev/null +++ b/src/util/mdx_collect.rs @@ -0,0 +1,70 @@ +//! Collect info for MDX. + +use crate::event::{Kind, Name, Point}; +use crate::tokenizer::Tokenizer; +use crate::util::slice::{Position, Slice}; +use alloc::{string::String, vec, vec::Vec}; + +pub type Location<'a> = (usize, &'a Point); + +pub struct Result<'a> { + pub start: &'a Point, + pub value: String, + pub locations: Vec<Location<'a>>, +} + +pub fn collect<'a>(tokenizer: &'a Tokenizer, from: usize, names: &[Name]) -> Result<'a> { + let mut result = Result { + start: &tokenizer.events[from].point, + value: String::new(), + locations: vec![], + }; + let mut index = from; + let mut acc = 0; + + while index < tokenizer.events.len() { + if tokenizer.events[index].kind == Kind::Enter + && names.contains(&tokenizer.events[index].name) + { + // Include virtual spaces. + let value = Slice::from_position( + tokenizer.parse_state.bytes, + &Position { + start: &tokenizer.events[index].point, + end: &tokenizer.events[index + 1].point, + }, + ) + .serialize(); + acc += value.len(); + result.locations.push((acc, &tokenizer.events[index].point)); + result.value.push_str(&value); + } + + index += 1; + } + + result +} + +// Turn an index of `result.value` into a point in the whole document. +pub fn place_to_point(result: &Result, place: usize) -> Point { + let mut index = 0; + let mut point = result.start; + let mut rest = place; + + while index < result.locations.len() { + point = result.locations[index].1; + + if result.locations[index].0 > place { + break; + } + + rest = place - result.locations[index].0; + index += 1; + } + + let mut point = point.clone(); + point.column += rest; + point.index += rest; + point +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 2ea372c..6281356 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -6,6 +6,7 @@ pub mod decode_character_reference; pub mod edit_map; pub mod encode; pub mod gfm_tagfilter; +pub mod mdx_collect; pub mod normalize_identifier; pub mod sanitize_uri; pub mod skip; diff --git a/tests/code_indented.rs b/tests/code_indented.rs index 29d8909..bf39fa3 100644 --- a/tests/code_indented.rs +++ b/tests/code_indented.rs @@ -167,7 +167,11 @@ fn code_indented() -> Result<(), String> { "a <?\n ?>", &Options { allow_dangerous_html: true, - ..off.clone() + constructs: Constructs { + code_indented: false, + ..Constructs::default() + }, + ..Options::default() } )?, "<p>a <?\n?></p>", diff --git a/tests/gfm_table.rs b/tests/gfm_table.rs index b7f884a..8c46a30 100644 --- a/tests/gfm_table.rs +++ b/tests/gfm_table.rs @@ -1037,7 +1037,8 @@ bar "###, &Options { allow_dangerous_html: true, - ..gfm.clone() + constructs: Constructs::gfm(), + ..Options::default() } )?, r###"<h1>Grave accents</h1> @@ -1345,7 +1346,8 @@ b "###, &Options { allow_dangerous_html: true, - ..gfm.clone() + constructs: Constructs::gfm(), + ..Options::default() } )?, r###"<h2>Blank line</h2> diff --git a/tests/math_text.rs b/tests/math_text.rs index dced393..7b53268 100644 --- a/tests/math_text.rs +++ b/tests/math_text.rs @@ -30,7 +30,12 @@ fn math_text() -> Result<(), String> { "$foo$ $$bar$$", &Options { math_text_single_dollar: false, - ..math.clone() + constructs: Constructs { + math_text: true, + math_flow: true, + ..Constructs::default() + }, + ..Options::default() } )?, "<p>$foo$ <code class=\"language-math math-inline\">bar</code></p>", @@ -133,7 +138,12 @@ fn math_text() -> Result<(), String> { &Options { allow_dangerous_html: true, allow_dangerous_protocol: true, - ..math.clone() + constructs: Constructs { + math_text: true, + math_flow: true, + ..Constructs::default() + }, + ..Options::default() } )?, "<p><a href=\"$\">$</p>", diff --git a/tests/mdx_esm.rs b/tests/mdx_esm.rs new file mode 100644 index 0000000..f1ea122 --- /dev/null +++ b/tests/mdx_esm.rs @@ -0,0 +1,241 @@ +extern crate micromark; +mod test_utils; +use micromark::{micromark_with_options, Constructs, Options}; +use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; + +#[test] +fn mdx_esm() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("import a from 'b'\n\nc", &swc)?, + "<p>c</p>", + "should support an import" + ); + + assert_eq!( + micromark_with_options("export default a\n\nb", &swc)?, + "<p>b</p>", + "should support an export" + ); + + assert_eq!( + micromark_with_options("impossible", &swc)?, + "<p>impossible</p>", + "should not support other keywords (`impossible`)" + ); + + assert_eq!( + micromark_with_options("exporting", &swc)?, + "<p>exporting</p>", + "should not support other keywords (`exporting`)" + ); + + assert_eq!( + micromark_with_options("import.", &swc)?, + "<p>import.</p>", + "should not support a non-whitespace after the keyword" + ); + + assert_eq!( + micromark_with_options("import('a')", &swc)?, + "<p>import('a')</p>", + "should not support a non-whitespace after the keyword (import-as-a-function)" + ); + + assert_eq!( + micromark_with_options(" import a from 'b'\n export default c", &swc)?, + "<p>import a from 'b'\nexport default c</p>", + "should not support an indent" + ); + + assert_eq!( + micromark_with_options("- import a from 'b'\n> export default c", &swc)?, + "<ul>\n<li>import a from 'b'</li>\n</ul>\n<blockquote>\n<p>export default c</p>\n</blockquote>", + "should not support keywords in containers" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\nexport default c", &swc)?, + "", + "should support imports and exports in the same “block”" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n\nexport default c", &swc)?, + "", + "should support imports and exports in separate “blocks”" + ); + + assert_eq!( + micromark_with_options("a\n\nimport a from 'b'\n\nb\n\nexport default c", &swc)?, + "<p>a</p>\n<p>b</p>\n", + "should support imports and exports in between other constructs" + ); + + assert_eq!( + micromark_with_options("a\nimport a from 'b'\n\nb\nexport default c", &swc)?, + "<p>a\nimport a from 'b'</p>\n<p>b\nexport default c</p>", + "should not support import/exports when interrupting paragraphs" + ); + + assert_eq!( + micromark_with_options("import a", &swc).err().unwrap(), + "1:9: Could not parse esm with swc: Expected ',', got '<eof>'", + "should crash on invalid import/exports (1)" + ); + + assert_eq!( + micromark_with_options("import 1/1", &swc).err().unwrap(), + "1:9: Could not parse esm with swc: Expected 'from', got 'numeric literal (1, 1)'", + "should crash on invalid import/exports (2)" + ); + + assert_eq!( + micromark_with_options("export {\n a\n} from 'b'\n\nc", &swc)?, + "<p>c</p>", + "should support line endings in import/exports" + ); + + assert_eq!( + micromark_with_options("export {\n\n a\n\n} from 'b'\n\nc", &swc)?, + "<p>c</p>", + "should support blank lines in import/exports" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n*md*?", &swc) + .err() + .unwrap(), + "2:6: Could not parse esm with swc: Unexpected token `?`. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, ` for template literal, (, or an identifier", + "should crash on markdown after import/export w/o blank line" + ); + + assert_eq!( + micromark_with_options("export var a = 1\n// b\n/* c */\n\nd", &swc)?, + "<p>d</p>", + "should support comments in “blocks”" + ); + + assert_eq!( + micromark_with_options("export var a = 1\nvar b\n\nc", &swc) + .err() + .unwrap(), + "2:1: Unexpected statement in code: only import/exports are supported", + "should crash on other statements in “blocks”" + ); + + assert_eq!( + micromark_with_options("import ('a')\n\nb", &swc) + .err() + .unwrap(), + "1:1: Unexpected statement in code: only import/exports are supported", + "should crash on import-as-a-function with a space `import (x)`" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\nexport {a}\n\nc", &swc)?, + "<p>c</p>", + "should support a reexport from another import" + ); + + assert_eq!( + micromark_with_options("import a from 'b';\nexport {a};\n\nc", &swc)?, + "<p>c</p>", + "should support a reexport from another import w/ semicolons" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\nexport {a as default}\n\nc", &swc)?, + "<p>c</p>", + "should support a reexport default from another import" + ); + + assert_eq!( + micromark_with_options("export var a = () => <b />", &swc)?, + "", + "should support JSX by default" + ); + + assert_eq!( + micromark_with_options("export {a}\n", &swc)?, + "", + "should support EOF after EOL" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n\nexport {a}\n\nc", &swc)?, + "<p>c</p>", + "should support a reexport from another esm block (1)" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n\nexport {a}\n\n# c", &swc)?, + "<h1>c</h1>", + "should support a reexport from another esm block (2)" + ); + + let cases = vec![ + ("default", "import a from \"b\""), + ("whole", "import * as a from \"b\""), + ("destructuring", "import {a} from \"b\""), + ("destructuring and rename", "import {a as b} from \"c\""), + ("default and destructuring", "import a, {b as c} from \"d\""), + ("default and whole", "import a, * as b from \"c\""), + ("side-effects", "import \"a\""), + ]; + + for case in cases { + assert_eq!( + micromark_with_options(case.1, &swc)?, + "", + "should support imports: {}", + case.0 + ); + } + + let cases = vec![ + ("var", "export var a = \"\""), + ("const", "export const a = \"\""), + ("let", "export let a = \"\""), + ("multiple", "export var a, b"), + ("multiple w/ assignment", "export var a = \"a\", b = \"b\""), + ("function", "export function a() {}"), + ("class", "export class a {}"), + ("destructuring", "export var {a} = {}"), + ("rename destructuring", "export var {a: b} = {}"), + ("array destructuring", "export var [a] = []"), + ("default", "export default a = 1"), + ("default function", "export default function a() {}"), + ("default class", "export default class a {}"), + ("aggregate", "export * from \"a\""), + ("whole reexport", "export * as a from \"b\""), + ("reexport destructuring", "export {a} from \"b\""), + ( + "reexport destructuring w rename", + "export {a as b} from \"c\"", + ), + ("reexport as a default whole", "export {default} from \"b\""), + ( + "reexport default and non-default", + "export {default as a, b} from \"c\"", + ), + ]; + + for case in cases { + assert_eq!( + micromark_with_options(case.1, &swc)?, + "", + "should support exports: {}", + case.0 + ); + } + + Ok(()) +} diff --git a/tests/mdx_expression_flow.rs b/tests/mdx_expression_flow.rs index 2a66a9d..81a31a7 100644 --- a/tests/mdx_expression_flow.rs +++ b/tests/mdx_expression_flow.rs @@ -1,6 +1,8 @@ extern crate micromark; +mod test_utils; use micromark::{micromark_with_options, Constructs, Options}; use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; #[test] fn mdx_expression_flow_agnostic() -> Result<(), String> { @@ -82,153 +84,132 @@ fn mdx_expression_flow_agnostic() -> Result<(), String> { Ok(()) } -// To do: swc. -// #[test] -// fn mdx_expression_flow_gnostic() -> Result<(), String> { -// assert_eq!( -// micromark_with_options("{a}", &swc), -// "", -// "should support an expression" -// ); - -// assert_eq!( -// micromark_with_options("{}", &swc)?, -// "", -// "should support an empty expression" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("{a", &swc); -// // }, -// // /Unexpected end of file in expression, expected a corresponding closing brace for `{`/, -// // "should crash if no closing brace is found (1)" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("{b { c }", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected content after expression/, -// // "should crash if no closing brace is found (2)" -// // ); - -// assert_eq!( -// micromark_with_options("{\n}\na", &swc)?, -// "<p>a</p>", -// "should support a line ending in an expression" -// ); - -// assert_eq!( -// micromark_with_options("{ a } \t\nb", &swc)?, -// "<p>b</p>", -// "should support expressions followed by spaces" -// ); - -// assert_eq!( -// micromark_with_options(" { a }\nb", &swc)?, -// "<p>b</p>", -// "should support expressions preceded by spaces" -// ); - -// assert_eq!( -// micromark_with_options(" {`\n a\n `}", &swc)?, -// "", -// "should support indented expressions" -// ); - -// assert_eq!( -// micromark_with_options("a{(b)}c", &swc)?, -// "<p>ac</p>", -// "should support expressions padded w/ parens" -// ); - -// assert_eq!( -// micromark_with_options("a{/* b */ ( (c) /* d */ + (e) )}f", &swc)?, -// "<p>af</p>", -// "should support expressions padded w/ parens and comments" -// ); - -// Ok(()) -// } - -// To do: move to JSX, actually test spread in expressions? -// To do: swc. -// #[test] -// fn mdx_expression_spread() -> Result<(), String> { -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {b} c", &swc); -// // }, -// // /Unexpected `Property` in code: only spread elements are supported/, -// // "should crash if not a spread" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {...?} c", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on an incorrect spread" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {...b,c} d", &swc); -// // }, -// // /Unexpected extra content in spread: only a single spread is supported/, -// // "should crash if a spread and other things" -// // ); - -// assert_eq!( -// micromark_with_options("a {} b", &swc)?, -// "<p>a b</p>", -// "should support an empty spread" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {} b", &swc); -// // }, -// // /Unexpected empty expression/, -// // "should crash on an empty spread w/ `allowEmpty: false`" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("{a=b}", &swc); -// // }, -// // /Could not parse expression with swc: Shorthand property assignments are valid only in destructuring patterns/, -// // "should crash if not a spread w/ `allowEmpty`" -// // ); - -// assert_eq!( -// micromark_with_options("a {/* b */} c", &swc)?, -// "<p>a c</p>", -// "should support a comment spread" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {/* b */} c", &swc); -// // }, -// // /Unexpected empty expression/, -// // "should crash on a comment spread w/ `allowEmpty: false`" -// // ); - -// assert_eq!( -// micromark_with_options("a {...b} c", &swc)?, -// "<p>a c</p>", -// "should support a spread" -// ); - -// Ok(()) -// } +#[test] +fn mdx_expression_flow_gnostic() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("{a}", &swc)?, + "", + "should support an expression" + ); + + assert_eq!( + micromark_with_options("{}", &swc)?, + "", + "should support an empty expression" + ); + + assert_eq!( + micromark_with_options("{a", &swc).err().unwrap(), + "1:3: Unexpected end of file in expression, expected a corresponding closing brace for `{`", + "should crash if no closing brace is found (1)" + ); + + assert_eq!( + micromark_with_options("{b { c }", &swc).err().unwrap(), + "1:4: Could not parse expression with swc: Unexpected content after expression", + "should crash if no closing brace is found (2)" + ); + + assert_eq!( + micromark_with_options("{\n}\na", &swc)?, + "<p>a</p>", + "should support a line ending in an expression" + ); + + assert_eq!( + micromark_with_options("{ a } \t\nb", &swc)?, + "<p>b</p>", + "should support expressions followed by spaces" + ); + + assert_eq!( + micromark_with_options(" { a }\nb", &swc)?, + "<p>b</p>", + "should support expressions preceded by spaces" + ); + + assert_eq!( + micromark_with_options(" {`\n a\n `}", &swc)?, + "", + "should support indented expressions" + ); + + assert_eq!( + micromark_with_options("a{(b)}c", &swc)?, + "<p>ac</p>", + "should support expressions padded w/ parens" + ); + + assert_eq!( + micromark_with_options("a{/* b */ ( (c) /* d */ + (e) )}f", &swc)?, + "<p>af</p>", + "should support expressions padded w/ parens and comments" + ); + + Ok(()) +} + +#[test] +fn mdx_expression_spread() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("<a {...b} />", &swc)?, + "", + "should support spreads for attribute expression" + ); + + assert_eq!( + micromark_with_options("<a {b} />", &swc).err().unwrap(), + "1:5: Expected a single spread value, such as `...x`", + "should crash if not a spread" + ); + + assert_eq!( + micromark_with_options("<a {...?} />", &swc).err().unwrap(), + "1:13: Could not parse expression with swc: Unexpected token `?`. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, ` for template literal, (, or an identifier", + "should crash on an incorrect spread" + ); + + assert_eq!( + micromark_with_options("<a {...b,c} d>", &swc) + .err() + .unwrap(), + "1:5: Expected a single spread value, such as `...x`", + "should crash if a spread and other things" + ); + + assert_eq!( + micromark_with_options("<a {} />", &swc).err().unwrap(), + "1:5: Expected a single spread value, such as `...x`", + "should crash on an empty spread" + ); + + assert_eq!( + micromark_with_options("<a {a=b} />", &swc).err().unwrap(), + "1:12: Could not parse expression with swc: assignment property is invalid syntax", + "should crash if not an identifier" + ); + + assert_eq!( + micromark_with_options("<a {/* b */} />", &swc) + .err() + .unwrap(), + "1:5: Expected a single spread value, such as `...x`", + "should crash on a comment spread" + ); + + Ok(()) +} diff --git a/tests/mdx_expression_text.rs b/tests/mdx_expression_text.rs index b42faf2..3a48965 100644 --- a/tests/mdx_expression_text.rs +++ b/tests/mdx_expression_text.rs @@ -1,147 +1,144 @@ extern crate micromark; +mod test_utils; use micromark::{micromark_with_options, Constructs, Options}; use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; -// To do: swc. -// #[test] -// fn mdx_expression_text_gnostic_core() -> Result<(), String> { -// assert_eq!( -// micromark_with_options("a {} b", &swc)?, -// "<p>a b</p>", -// "should support an empty expression (1)" -// ); - -// assert_eq!( -// micromark_with_options("a { \t\r\n} b", &swc)?, -// "<p>a b</p>", -// "should support an empty expression (2)" -// ); - -// assert_eq!( -// micromark_with_options("a {/**/} b", &swc)?, -// "<p>a b</p>", -// "should support a multiline comment (1)" -// ); - -// assert_eq!( -// micromark_with_options("a { /*\n*/\t} b", &swc)?, -// "<p>a b</p>", -// "should support a multiline comment (2)" -// ); - -// assert_eq!( -// micromark_with_options("a {/*b*//*c*/} d", &swc)?, -// "<p>a d</p>", -// "should support a multiline comment (3)" -// ); - -// assert_eq!( -// micromark_with_options("a {b/*c*/} d", &swc)?, -// "<p>a d</p>", -// "should support a multiline comment (4)" -// ); - -// assert_eq!( -// micromark_with_options("a {/*b*/c} d", &swc)?, -// "<p>a d</p>", -// "should support a multiline comment (4)" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {//} b", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on an incorrect line comment (1)" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a { // b } c", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on an incorrect line comment (2)" -// // ); - -// assert_eq!( -// micromark_with_options("a {//\n} b", &swc)?, -// "<p>a b</p>", -// "should support a line comment followed by a line ending" -// ); - -// assert_eq!( -// micromark_with_options("a {// b\nd} d", &swc)?, -// "<p>a d</p>", -// "should support a line comment followed by a line ending and an expression" -// ); - -// assert_eq!( -// micromark_with_options("a {b// c\n} d", &swc)?, -// "<p>a d</p>", -// "should support an expression followed by a line comment and a line ending" -// ); - -// assert_eq!( -// micromark_with_options("a {/*b*/ // c\n} d", &swc)?, -// "<p>a d</p>", -// "should support comments (1)" -// ); - -// assert_eq!( -// micromark_with_options("a {b.c} d", &swc)?, -// "<p>a d</p>", -// "should support expression statements (1)" -// ); - -// assert_eq!( -// micromark_with_options("a {1 + 1} b", &swc)?, -// "<p>a b</p>", -// "should support expression statements (2)" -// ); - -// assert_eq!( -// micromark_with_options("a {function () {}} b", &swc)?, -// "<p>a b</p>", -// "should support expression statements (3)" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {var b = \"c\"} d", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on non-expressions" -// // ); - -// assert_eq!( -// micromark_with_options("> a {\n> b} c", &swc)?, -// "<blockquote>\n<p>a c</p>\n</blockquote>", -// "should support expressions in containers" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("> a {\n> b<} c", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on incorrect expressions in containers (1)" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("> a {\n> b\n> c} d", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected content after expression/, -// // "should crash on incorrect expressions in containers (2)" -// // ); - -// Ok(()) -// } +#[test] +fn mdx_expression_text_gnostic_core() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("a {} b", &swc)?, + "<p>a b</p>", + "should support an empty expression (1)" + ); + + assert_eq!( + micromark_with_options("a { \t\r\n} b", &swc)?, + "<p>a b</p>", + "should support an empty expression (2)" + ); + + assert_eq!( + micromark_with_options("a {/**/} b", &swc)?, + "<p>a b</p>", + "should support a multiline comment (1)" + ); + + assert_eq!( + micromark_with_options("a { /*\n*/\t} b", &swc)?, + "<p>a b</p>", + "should support a multiline comment (2)" + ); + + assert_eq!( + micromark_with_options("a {/*b*//*c*/} d", &swc)?, + "<p>a d</p>", + "should support a multiline comment (3)" + ); + + assert_eq!( + micromark_with_options("a {b/*c*/} d", &swc)?, + "<p>a d</p>", + "should support a multiline comment (4)" + ); + + assert_eq!( + micromark_with_options("a {/*b*/c} d", &swc)?, + "<p>a d</p>", + "should support a multiline comment (4)" + ); + + assert_eq!( + micromark_with_options("a {//} b", &swc).err().unwrap(), + "1:4: Could not parse expression with swc: Unexpected eof", + "should crash on an incorrect line comment (1)" + ); + + assert_eq!( + micromark_with_options("a { // b } c", &swc).err().unwrap(), + "1:4: Could not parse expression with swc: Unexpected eof", + "should crash on an incorrect line comment (2)" + ); + + assert_eq!( + micromark_with_options("a {//\n} b", &swc)?, + "<p>a b</p>", + "should support a line comment followed by a line ending" + ); + + assert_eq!( + micromark_with_options("a {// b\nd} d", &swc)?, + "<p>a d</p>", + "should support a line comment followed by a line ending and an expression" + ); + + assert_eq!( + micromark_with_options("a {b// c\n} d", &swc)?, + "<p>a d</p>", + "should support an expression followed by a line comment and a line ending" + ); + + assert_eq!( + micromark_with_options("a {/*b*/ // c\n} d", &swc)?, + "<p>a d</p>", + "should support comments (1)" + ); + + assert_eq!( + micromark_with_options("a {b.c} d", &swc)?, + "<p>a d</p>", + "should support expression statements (1)" + ); + + assert_eq!( + micromark_with_options("a {1 + 1} b", &swc)?, + "<p>a b</p>", + "should support expression statements (2)" + ); + + assert_eq!( + micromark_with_options("a {function () {}} b", &swc)?, + "<p>a b</p>", + "should support expression statements (3)" + ); + + assert_eq!( + micromark_with_options("a {var b = \"c\"} d", &swc).err().unwrap(), + "1:7: Could not parse expression with swc: Unexpected token `var`. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, ` for template literal, (, or an identifier", + "should crash on non-expressions" + ); + + assert_eq!( + micromark_with_options("> a {\n> b} c", &swc)?, + "<blockquote>\n<p>a c</p>\n</blockquote>", + "should support expressions in containers" + ); + + assert_eq!( + micromark_with_options("> a {\n> b<} c", &swc) + .err() + .unwrap(), + "2:8: Could not parse expression with swc: Unexpected eof", + "should crash on incorrect expressions in containers (1)" + ); + + assert_eq!( + micromark_with_options("> a {\n> b\n> c} d", &swc) + .err() + .unwrap(), + "3:3: Could not parse expression with swc: Unexpected content after expression", + "should crash on incorrect expressions in containers (2)" + ); + + Ok(()) +} #[test] fn mdx_expression_text_agnostic() -> Result<(), String> { @@ -197,77 +194,74 @@ fn mdx_expression_text_agnostic() -> Result<(), String> { Ok(()) } -// // To do: swc. -// #[test] -// fn mdx_expression_text_gnostic() -> Result<(), String> { -// assert_eq!( -// micromark_with_options("a {b} c", &swc)?, -// "<p>a c</p>", -// "should support an expression" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {??} b", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected token/, -// // "should crash on an incorrect expression" -// // ); - -// assert_eq!( -// micromark_with_options("a {} b", &swc)?, -// "<p>a b</p>", -// "should support an empty expression" -// ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {b c", &swc); -// // }, -// // /Unexpected end of file in expression, expected a corresponding closing brace for `{`/, -// // "should crash if no closing brace is found (1)" -// // ); - -// // To do: errors. -// // t.throws( -// // () => { -// // micromark_with_options("a {b { c } d", &swc); -// // }, -// // /Could not parse expression with swc: Unexpected content after expression/, -// // "should crash if no closing brace is found (2)" -// // ); - -// assert_eq!( -// micromark_with_options("a {\n} b", &swc)?, -// "<p>a b</p>", -// "should support a line ending in an expression" -// ); - -// assert_eq!( -// micromark_with_options("a } b", &swc)?, -// "<p>a } b</p>", -// "should support just a closing brace" -// ); - -// assert_eq!( -// micromark_with_options("{ a } b", &swc)?, -// "<p> b</p>", -// "should support expressions as the first thing when following by other things" -// ); - -// assert_eq!( -// micromark_with_options("a { /* { */ } b", &swc)?, -// "<p>a b</p>", -// "should support an unbalanced opening brace (if JS permits)" -// ); - -// assert_eq!( -// micromark_with_options("a { /* } */ } b", &swc)?, -// "<p>a b</p>", -// "should support an unbalanced closing brace (if JS permits)" -// ); - -// Ok(()) -// } +#[test] +fn mdx_expression_text_gnostic() -> Result<(), String> { + let swc = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("a {b} c", &swc)?, + "<p>a c</p>", + "should support an expression" + ); + + assert_eq!( + micromark_with_options("a {??} b", &swc).err().unwrap(), + "1:9: Could not parse expression with swc: Unexpected eof", + "should crash on an incorrect expression" + ); + + assert_eq!( + micromark_with_options("a {} b", &swc)?, + "<p>a b</p>", + "should support an empty expression" + ); + + assert_eq!( + micromark_with_options("a {b c", &swc).err().unwrap(), + "1:7: Unexpected end of file in expression, expected a corresponding closing brace for `{`", + "should crash if no closing brace is found (1)" + ); + + assert_eq!( + micromark_with_options("a {b { c } d", &swc).err().unwrap(), + "1:6: Could not parse expression with swc: Unexpected content after expression", + "should crash if no closing brace is found (2)" + ); + + assert_eq!( + micromark_with_options("a {\n} b", &swc)?, + "<p>a b</p>", + "should support a line ending in an expression" + ); + + assert_eq!( + micromark_with_options("a } b", &swc)?, + "<p>a } b</p>", + "should support just a closing brace" + ); + + assert_eq!( + micromark_with_options("{ a } b", &swc)?, + "<p> b</p>", + "should support expressions as the first thing when following by other things" + ); + + assert_eq!( + micromark_with_options("a { /* { */ } b", &swc)?, + "<p>a b</p>", + "should support an unbalanced opening brace (if JS permits)" + ); + + assert_eq!( + micromark_with_options("a { /* } */ } b", &swc)?, + "<p>a b</p>", + "should support an unbalanced closing brace (if JS permits)" + ); + + Ok(()) +} diff --git a/tests/mdx_jsx_text.rs b/tests/mdx_jsx_text.rs index cf507ee..be76d6f 100644 --- a/tests/mdx_jsx_text.rs +++ b/tests/mdx_jsx_text.rs @@ -1,6 +1,8 @@ extern crate micromark; +mod test_utils; use micromark::{micromark_with_options, Constructs, Options}; use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; #[test] fn mdx_jsx_text_core() -> Result<(), String> { @@ -84,99 +86,97 @@ fn mdx_jsx_text_agnosic() -> Result<(), String> { #[test] fn mdx_jsx_text_gnostic() -> Result<(), String> { - let mdx = Options { + let swc = Options { constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), ..Options::default() }; assert_eq!( - micromark_with_options("a <b /> c", &mdx)?, + micromark_with_options("a <b /> c", &swc)?, "<p>a c</p>", "should support a self-closing element" ); assert_eq!( - micromark_with_options("a <b> c </b> d", &mdx)?, + micromark_with_options("a <b> c </b> d", &swc)?, "<p>a c d</p>", "should support a closed element" ); assert_eq!( - micromark_with_options("a <b> c", &mdx)?, + micromark_with_options("a <b> c", &swc)?, "<p>a c</p>", "should support an unclosed element" ); assert_eq!( - micromark_with_options("a <b {...c} /> d", &mdx)?, + micromark_with_options("a <b {...c} /> d", &swc)?, "<p>a d</p>", "should support an attribute expression" ); assert_eq!( - micromark_with_options("a <b {...{c: 1, d: Infinity, e: false}} /> f", &mdx)?, + micromark_with_options("a <b {...{c: 1, d: Infinity, e: false}} /> f", &swc)?, "<p>a f</p>", "should support more complex attribute expression (1)" ); assert_eq!( - micromark_with_options("a <b {...[1, Infinity, false]} /> d", &mdx)?, + micromark_with_options("a <b {...[1, Infinity, false]} /> d", &swc)?, "<p>a d</p>", "should support more complex attribute expression (2)" ); assert_eq!( - micromark_with_options("a <b c={1 + 1} /> d", &mdx)?, + micromark_with_options("a <b c={1 + 1} /> d", &swc)?, "<p>a d</p>", "should support an attribute value expression" ); assert_eq!( - micromark_with_options("a <b c={} /> d", &mdx) + micromark_with_options("a <b c={} /> d", &swc) .err() .unwrap(), - "1:9: Unexpected empty expression, expected a value between braces", + "1:15: Could not parse expression with swc: Unexpected eof", "should crash on an empty attribute value expression" ); - // To do: swc. - // assert_eq!( - // micromark_with_options("a <b {1 + 1} /> c", &swc) - // .err() - // .unwrap(), - // "Could not parse expression with acorn: Unexpected token", - // "should crash on a non-spread attribute expression" - // ); - - // To do: swc. - // assert_eq!( - // micromark_with_options("a <b c={?} /> d", &swc) - // .err() - // .unwrap(), - // "Could not parse expression with acorn: Unexpected token", - // "should crash on invalid JS in an attribute value expression" - // ); - - // To do: swc. - // assert_eq!( - // micromark_with_options("a <b {?} /> c", &swc) - // .err() - // .unwrap(), - // "Could not parse expression with acorn: Unexpected token", - // "should crash on invalid JS in an attribute expression" - // ); - - // To do: swc. - // assert_eq!( - // micromark_with_options("a <b{c=d}={}/> f", &swc) - // .err() - // .unwrap(), - // "Unexpected `ExpressionStatement` in code: expected an object spread", - // "should crash on invalid JS in an attribute expression (2)" - // ); - - assert_eq!( - micromark_with_options("a <b c={(2)} d={<e />} /> f", &mdx)?, + assert_eq!( + micromark_with_options("a <b {1 + 1} /> c", &swc) + .err() + .unwrap(), + "1:18: Could not parse expression with swc: Expected ',', got '}'", + "should crash on a non-spread attribute expression" + ); + + assert_eq!( + micromark_with_options("a <b c={?} /> d", &swc) + .err() + .unwrap(), + "1:16: Could not parse expression with swc: Unexpected token `?`. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, ` for template literal, (, or an identifier", + "should crash on invalid JS in an attribute value expression" + ); + + assert_eq!( + micromark_with_options("a <b {?} /> c", &swc) + .err() + .unwrap(), + "1:14: Could not parse expression with swc: Unexpected token `?`. Expected identifier, string literal, numeric literal or [ for the computed key", + "should crash on invalid JS in an attribute expression" + ); + + assert_eq!( + micromark_with_options("a <b{c=d}={}/> f", &swc) + .err() + .unwrap(), + "1:6: Expected a single spread value, such as `...x`", + "should crash on invalid JS in an attribute expression (2)" + ); + + assert_eq!( + micromark_with_options("a <b c={(2)} d={<e />} /> f", &swc)?, "<p>a f</p>", "should support parenthesized expressions" ); diff --git a/tests/mdx_swc.rs b/tests/mdx_swc.rs new file mode 100644 index 0000000..c9a2a61 --- /dev/null +++ b/tests/mdx_swc.rs @@ -0,0 +1,47 @@ +extern crate micromark; +mod test_utils; +use micromark::{micromark_with_options, Constructs, Options}; +use pretty_assertions::assert_eq; +use test_utils::{parse_esm, parse_expression}; + +#[test] +fn mdx_swc() -> Result<(), String> { + let mdx = Options { + constructs: Constructs::mdx(), + mdx_esm_parse: Some(Box::new(parse_esm)), + mdx_expression_parse: Some(Box::new(parse_expression)), + ..Options::default() + }; + + assert_eq!( + micromark_with_options("{'}'}", &mdx)?, + "", + "should support JavaScript-aware flow expressions w/ `mdx_expression_parse`" + ); + + assert_eq!( + micromark_with_options("a {'}'} b", &mdx)?, + "<p>a b</p>", + "should support JavaScript-aware text expressions w/ `mdx_expression_parse`" + ); + + assert_eq!( + micromark_with_options("<a {...a/*}*/} />", &mdx)?, + "", + "should support JavaScript-aware attribute expressions w/ `mdx_expression_parse`" + ); + + assert_eq!( + micromark_with_options("<a b={'}'} />", &mdx)?, + "", + "should support JavaScript-aware attribute value expressions w/ `mdx_expression_parse`" + ); + + assert_eq!( + micromark_with_options("import a from 'b'\n\nexport {a}\n\n# c", &mdx)?, + "<h1>c</h1>", + "should support JavaScript-aware ESM w/ `mdx_esm_parse`" + ); + + Ok(()) +} diff --git a/tests/test_utils/mod.rs b/tests/test_utils/mod.rs new file mode 100644 index 0000000..10b9643 --- /dev/null +++ b/tests/test_utils/mod.rs @@ -0,0 +1,245 @@ +extern crate micromark; +extern crate swc_common; +extern crate swc_ecma_ast; +extern crate swc_ecma_parser; +use micromark::{MdxExpressionKind, MdxSignal}; +use swc_common::{source_map::Pos, BytePos, FileName, SourceFile, Spanned}; +use swc_ecma_ast::{EsVersion, Expr, Module}; +use swc_ecma_parser::{ + error::Error as SwcError, parse_file_as_expr, parse_file_as_module, EsConfig, Syntax, +}; + +/// Parse ESM in MDX with SWC. +pub fn parse_esm(value: &str) -> MdxSignal { + let (file, syntax, version) = create_config(value.to_string()); + let mut errors = vec![]; + let result = parse_file_as_module(&file, syntax, version, None, &mut errors); + + match result { + Err(error) => swc_error_to_signal(&error, value.len(), 0, "esm"), + Ok(tree) => { + if errors.is_empty() { + check_esm_ast(tree) + } else { + if errors.len() > 1 { + println!("parse_esm: todo: multiple errors? {:?}", errors); + } + swc_error_to_signal(&errors[0], value.len(), 0, "esm") + } + } + } +} + +/// Parse expressions in MDX with SWC. +pub fn parse_expression(value: &str, kind: MdxExpressionKind) -> MdxSignal { + // Empty expressions are OK. + if matches!(kind, MdxExpressionKind::Expression) + && matches!(whitespace_and_comments(0, value), MdxSignal::Ok) + { + return MdxSignal::Ok; + } + + // For attribute expression, a spread is needed, for which we have to prefix + // and suffix the input. + // See `check_expression_ast` for how the AST is verified. + let (prefix, suffix) = if matches!(kind, MdxExpressionKind::AttributeExpression) { + ("({", "})") + } else { + ("", "") + }; + + let (file, syntax, version) = create_config(format!("{}{}{}", prefix, value, suffix)); + let mut errors = vec![]; + let result = parse_file_as_expr(&file, syntax, version, None, &mut errors); + + match result { + Err(error) => swc_error_to_signal(&error, value.len(), prefix.len(), "expression"), + Ok(tree) => { + if errors.is_empty() { + let place = fix_swc_position(tree.span().hi.to_usize(), prefix.len()); + let result = check_expression_ast(tree, kind); + if matches!(result, MdxSignal::Ok) { + whitespace_and_comments(place, value) + } else { + result + } + } else { + if errors.len() > 1 { + unreachable!("parse_expression: todo: multiple errors? {:?}", errors); + } + swc_error_to_signal(&errors[0], value.len(), prefix.len(), "expression") + } + } + } +} + +/// Check that the resulting AST of ESM is OK. +/// +/// This checks that only module declarations (import/exports) are used, not +/// statements. +fn check_esm_ast(tree: Module) -> MdxSignal { + let mut index = 0; + while index < tree.body.len() { + let node = &tree.body[index]; + + if !node.is_module_decl() { + let place = fix_swc_position(node.span().hi.to_usize(), 0); + return MdxSignal::Error( + "Unexpected statement in code: only import/exports are supported".to_string(), + place, + ); + } + + index += 1; + } + + MdxSignal::Ok +} + +/// Check that the resulting AST of an expressions is OK. +/// +/// This checks that attribute expressions are the expected spread. +fn check_expression_ast(tree: Box<Expr>, kind: MdxExpressionKind) -> MdxSignal { + if matches!(kind, MdxExpressionKind::AttributeExpression) + && tree + .unwrap_parens() + .as_object() + .and_then(|object| { + if object.props.len() == 1 { + object.props[0].as_spread() + } else { + None + } + }) + .is_none() + { + MdxSignal::Error( + "Expected a single spread value, such as `...x`".to_string(), + 0, + ) + } else { + MdxSignal::Ok + } +} + +/// Turn an SWC error into an `MdxSignal`. +/// +/// * If the error happens at `value_len`, yields `MdxSignal::Eof` +/// * Else, yields `MdxSignal::Error`. +fn swc_error_to_signal( + error: &SwcError, + value_len: usize, + prefix_len: usize, + name: &str, +) -> MdxSignal { + let message = error.kind().msg().to_string(); + let place = fix_swc_position(error.span().hi.to_usize(), prefix_len); + let message = format!("Could not parse {} with swc: {}", name, message); + + if place >= value_len { + MdxSignal::Eof(message) + } else { + MdxSignal::Error(message, place) + } +} + +/// Move past JavaScript whitespace (well, actually ASCII whitespace) and +/// comments. +/// +/// This is needed because for expressions, we use an API that parses up to +/// a valid expression, but there may be more expressions after it, which we +/// don’t alow. +fn whitespace_and_comments(mut index: usize, value: &str) -> MdxSignal { + let bytes = value.as_bytes(); + let len = bytes.len(); + let mut in_multiline = false; + let mut in_line = false; + + while index < len { + // In a multiline comment: `/* a */`. + if in_multiline { + if index + 1 < len && bytes[index] == b'*' && bytes[index + 1] == b'/' { + index += 1; + in_multiline = false; + } + } + // In a line comment: `// a`. + else if in_line { + if index + 1 < len && bytes[index] == b'\r' && bytes[index + 1] == b'\n' { + index += 1; + in_line = false; + } else if bytes[index] == b'\r' || bytes[index] == b'\n' { + in_line = false; + } + } + // Not in a comment, opening a multiline comment: `/* a */`. + else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'*' { + index += 1; + in_multiline = true; + } + // Not in a comment, opening a line comment: `// a`. + else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'/' { + index += 1; + in_line = true; + } + // Outside comment, whitespace. + else if bytes[index].is_ascii_whitespace() { + // Fine! + } + // Outside comment, not whitespace. + else { + return MdxSignal::Error( + "Could not parse expression with swc: Unexpected content after expression" + .to_string(), + index, + ); + } + + index += 1; + } + + if in_multiline { + MdxSignal::Error( + "Could not parse expression with swc: Unexpected unclosed multiline comment, expected closing: `*/`".to_string(), + index, + ) + } else if in_line { + // EOF instead of EOL is specifically not allowed, because that would + // mean the closing brace is on the commented-out line + MdxSignal::Error( + "Could not parse expression with swc: Unexpected unclosed line comment, expected line ending: `\\n`".to_string(), + index, + ) + } else { + MdxSignal::Ok + } +} + +/// Create configuration for SWC, shared between ESM and expressions. +/// +/// This enables modern JavaScript (ES2022) + JSX. +fn create_config(source: String) -> (SourceFile, Syntax, EsVersion) { + ( + // File. + SourceFile::new( + FileName::Anon, + false, + FileName::Anon, + source, + BytePos::from_usize(1), + ), + // Syntax. + Syntax::Es(EsConfig { + jsx: true, + ..EsConfig::default() + }), + // Version. + EsVersion::Es2022, + ) +} + +/// Turn an SWC byte position from a resulting AST to an offset in the original +/// input string. +fn fix_swc_position(index: usize, prefix_len: usize) -> usize { + index - 1 - prefix_len +} |