diff options
Diffstat (limited to 'src/construct')
| -rw-r--r-- | src/construct/flow.rs | 8 | ||||
| -rw-r--r-- | src/construct/mdx_esm.rs | 224 | ||||
| -rw-r--r-- | src/construct/mod.rs | 2 | ||||
| -rw-r--r-- | src/construct/partial_mdx_expression.rs | 98 | ||||
| -rw-r--r-- | src/construct/partial_mdx_jsx.rs | 3 | 
5 files changed, 314 insertions, 21 deletions
diff --git a/src/construct/flow.rs b/src/construct/flow.rs index 08e0466..d6a79d8 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -15,6 +15,7 @@  //! *   [Heading (atx)][crate::construct::heading_atx]  //! *   [Heading (setext)][crate::construct::heading_setext]  //! *   [HTML (flow)][crate::construct::html_flow] +//! *   [MDX esm][crate::construct::mdx_esm]  //! *   [MDX expression (flow)][crate::construct::mdx_expression_flow]  //! *   [MDX JSX (flow)][crate::construct::mdx_jsx_flow]  //! *   [Raw (flow)][crate::construct::raw_flow] (code (fenced), math (flow)) @@ -66,6 +67,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {              );              State::Retry(StateName::HtmlFlowStart)          } +        Some(b'e' | b'i') => { +            tokenizer.attempt( +                State::Next(StateName::FlowAfter), +                State::Next(StateName::FlowBeforeContent), +            ); +            State::Retry(StateName::MdxEsmStart) +        }          Some(b'{') => {              tokenizer.attempt(                  State::Next(StateName::FlowAfter), diff --git a/src/construct/mdx_esm.rs b/src/construct/mdx_esm.rs new file mode 100644 index 0000000..53f8beb --- /dev/null +++ b/src/construct/mdx_esm.rs @@ -0,0 +1,224 @@ +//! MDX ESM occurs in the [flow][] content type. +//! +//! ## Grammar +//! +//! MDX expression (flow) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! mdx_esm ::= word *line *(eol *line) +//! +//! word ::= 'e' 'x' 'p' 'o' 'r' 't' | 'i' 'm' 'p' 'o' 'r' 't' +//! ``` +//! +//! This construct must be followed by a blank line or eof (end of file). +//! It can include blank lines if [`MdxEsmParse`][crate::MdxEsmParse] passed in +//! `options.mdx_esm_parse` allows it. +//! +//! ## Tokens +//! +//! *   [`LineEnding`][Name::LineEnding] +//! *   [`MdxEsm`][Name::MdxEsm] +//! *   [`MdxEsmData`][Name::MdxEsmData] +//! +//! ## References +//! +//! *   [`syntax.js` in `micromark-extension-mdxjs-esm`](https://github.com/micromark/micromark-extension-mdxjs-esm/blob/main/dev/lib/syntax.js) +//! *   [`mdxjs.com`](https://mdxjs.com) +//! +//! [flow]: crate::construct::flow + +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; +use crate::util::{ +    mdx_collect::{collect, place_to_point}, +    slice::Slice, +}; +use crate::MdxSignal; +use alloc::format; + +/// Start of MDX ESM. +/// +/// ```markdown +/// > | import a from 'b' +///     ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { +    // If it’s turned on. +    if tokenizer.parse_state.options.constructs.mdx_esm +        // If there is a gnostic parser. +        && tokenizer.parse_state.options.mdx_esm_parse.is_some() +        // When not interrupting. +        && !tokenizer.interrupt +        // Only at the start of a line, not at whitespace or in a container. +        && tokenizer.point.column == 1 +        && matches!(tokenizer.current, Some(b'e' | b'i')) +    { +        // Place where keyword starts. +        tokenizer.tokenize_state.start = tokenizer.point.index; +        tokenizer.enter(Name::MdxEsm); +        tokenizer.enter(Name::MdxEsmData); +        tokenizer.consume(); +        State::Next(StateName::MdxEsmWord) +    } else { +        State::Nok +    } +} + +/// In keyword. +/// +/// ```markdown +/// > | import a from 'b' +///     ^^^^^^ +/// ``` +pub fn word(tokenizer: &mut Tokenizer) -> State { +    if matches!(tokenizer.current, Some(b'a'..=b'z')) { +        tokenizer.consume(); +        State::Next(StateName::MdxEsmWord) +    } else { +        let slice = Slice::from_indices( +            tokenizer.parse_state.bytes, +            tokenizer.tokenize_state.start, +            tokenizer.point.index, +        ); + +        if matches!(slice.as_str(), "export" | "import") && tokenizer.current == Some(b' ') { +            tokenizer.concrete = true; +            tokenizer.tokenize_state.start = tokenizer.events.len() - 1; +            tokenizer.consume(); +            State::Next(StateName::MdxEsmInside) +        } else { +            tokenizer.tokenize_state.start = 0; +            State::Nok +        } +    } +} + +/// In data. +/// +/// ```markdown +/// > | import a from 'b' +///           ^ +/// ``` +pub fn inside(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        None | Some(b'\n') => { +            tokenizer.exit(Name::MdxEsmData); +            State::Retry(StateName::MdxEsmLineStart) +        } +        _ => { +            tokenizer.consume(); +            State::Next(StateName::MdxEsmInside) +        } +    } +} + +/// At start of line. +/// +/// ```markdown +///   | import a from 'b' +/// > | export {a} +///     ^ +/// ``` +pub fn line_start(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        None => State::Retry(StateName::MdxEsmAtEnd), +        Some(b'\n') => { +            tokenizer.check( +                State::Next(StateName::MdxEsmAtEnd), +                State::Next(StateName::MdxEsmContinuationStart), +            ); +            State::Retry(StateName::MdxEsmBlankLineBefore) +        } +        _ => { +            tokenizer.enter(Name::MdxEsmData); +            tokenizer.consume(); +            State::Next(StateName::MdxEsmInside) +        } +    } +} + +/// At start of line that continues. +/// +/// ```markdown +///   | import a from 'b' +/// > | export {a} +///     ^ +/// ``` +pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { +    tokenizer.enter(Name::LineEnding); +    tokenizer.consume(); +    tokenizer.exit(Name::LineEnding); +    State::Next(StateName::MdxEsmLineStart) +} + +/// At start of a potentially blank line. +/// +/// ```markdown +///   | import a from 'b' +/// > | export {a} +///     ^ +/// ``` +pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { +    tokenizer.enter(Name::LineEnding); +    tokenizer.consume(); +    tokenizer.exit(Name::LineEnding); +    State::Next(StateName::BlankLineStart) +} + +/// At end of line (blank or eof). +/// +/// ```markdown +/// > | import a from 'b' +///                      ^ +/// ``` +pub fn at_end(tokenizer: &mut Tokenizer) -> State { +    let result = parse_esm(tokenizer); + +    // Done!. +    if matches!(result, State::Ok) { +        tokenizer.concrete = false; +        tokenizer.exit(Name::MdxEsm); +    } + +    result +} + +/// Parse ESM with a given function. +fn parse_esm(tokenizer: &mut Tokenizer) -> State { +    // We can `unwrap` because we don’t parse if this is `None`. +    let parse = tokenizer +        .parse_state +        .options +        .mdx_esm_parse +        .as_ref() +        .unwrap(); + +    // Collect the body of the ESM and positional info for each run of it. +    let result = collect( +        tokenizer, +        tokenizer.tokenize_state.start, +        &[Name::MdxEsmData, Name::LineEnding], +    ); + +    // Parse and handle what was signaled back. +    match parse(&result.value) { +        MdxSignal::Ok => State::Ok, +        MdxSignal::Error(message, place) => { +            let point = place_to_point(&result, place); +            State::Error(format!("{}:{}: {}", point.line, point.column, message)) +        } +        MdxSignal::Eof(message) => { +            if tokenizer.current == None { +                State::Error(format!( +                    "{}:{}: {}", +                    tokenizer.point.line, tokenizer.point.column, message +                )) +            } else { +                tokenizer.tokenize_state.mdx_last_parse_error = Some(message); +                State::Retry(StateName::MdxEsmContinuationStart) +            } +        } +    } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ae6facf..88f3050 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -66,6 +66,7 @@  //! *   [gfm task list item check][gfm_task_list_item_check]  //! *   [mdx expression (flow)][mdx_expression_flow]  //! *   [mdx expression (text)][mdx_expression_text] +//! *   [mdx esm][mdx_esm]  //! *   [mdx jsx (flow)][mdx_jsx_flow]  //! *   [mdx jsx (text)][mdx_jsx_text]  //! @@ -169,6 +170,7 @@ pub mod label_end;  pub mod label_start_image;  pub mod label_start_link;  pub mod list_item; +pub mod mdx_esm;  pub mod mdx_expression_flow;  pub mod mdx_expression_text;  pub mod mdx_jsx_flow; diff --git a/src/construct/partial_mdx_expression.rs b/src/construct/partial_mdx_expression.rs index 31a9af8..3ebd0f0 100644 --- a/src/construct/partial_mdx_expression.rs +++ b/src/construct/partial_mdx_expression.rs @@ -14,7 +14,6 @@  //! ## Tokens  //!  //! *   [`LineEnding`][Name::LineEnding] -//! *   [`SpaceOrTab`][Name::SpaceOrTab]  //! *   [`MdxExpressionMarker`][Name::MdxExpressionMarker]  //! *   [`MdxExpressionData`][Name::MdxExpressionData]  //! @@ -61,7 +60,12 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max;  use crate::event::Name;  use crate::state::{Name as StateName, State};  use crate::tokenizer::Tokenizer; -use alloc::format; +use crate::util::{ +    constant::TAB_SIZE, +    mdx_collect::{collect, place_to_point}, +}; +use crate::{MdxExpressionKind, MdxExpressionParse, MdxSignal}; +use alloc::{format, string::ToString};  /// Start of an MDX expression.  /// @@ -75,6 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {      tokenizer.enter(Name::MdxExpressionMarker);      tokenizer.consume();      tokenizer.exit(Name::MdxExpressionMarker); +    tokenizer.tokenize_state.start = tokenizer.events.len() - 1;      State::Next(StateName::MdxExpressionBefore)  } @@ -88,8 +93,10 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => {              State::Error(format!( -                "{}:{}: Unexpected end of file in expression, expected a corresponding closing brace for `{{`", -                tokenizer.point.line, tokenizer.point.column +                "{}:{}: {}", +                tokenizer.point.line, tokenizer.point.column, +                tokenizer.tokenize_state.mdx_last_parse_error.take() +                    .unwrap_or_else(|| "Unexpected end of file in expression, expected a corresponding closing brace for `{`".to_string())              ))          }          Some(b'\n') => { @@ -97,24 +104,26 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {              tokenizer.consume();              tokenizer.exit(Name::LineEnding);              State::Next(StateName::MdxExpressionEolAfter) -        }, +        }          Some(b'}') if tokenizer.tokenize_state.size == 0 => { -            if tokenizer.tokenize_state.token_1 == Name::MdxJsxTagAttributeValueExpression && !tokenizer.tokenize_state.seen { -                State::Error(format!( -                    "{}:{}: Unexpected empty expression, expected a value between braces", -                    tokenizer.point.line, tokenizer.point.column -                )) +            let state = if let Some(ref parse) = tokenizer.parse_state.options.mdx_expression_parse +            { +                parse_expression(tokenizer, parse)              } else { -                tokenizer.tokenize_state.seen = false; +                State::Ok +            }; + +            if state == State::Ok { +                tokenizer.tokenize_state.start = 0;                  tokenizer.enter(Name::MdxExpressionMarker);                  tokenizer.consume();                  tokenizer.exit(Name::MdxExpressionMarker);                  tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); -                State::Ok              } -        }, + +            state +        }          Some(_) => { -            tokenizer.tokenize_state.seen = true;              tokenizer.enter(Name::MdxExpressionData);              State::Retry(StateName::MdxExpressionInside)          } @@ -134,8 +143,10 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {          tokenizer.exit(Name::MdxExpressionData);          State::Retry(StateName::MdxExpressionBefore)      } else { -        // To do: don’t count if gnostic. -        if tokenizer.current == Some(b'{') { +        // Don’t count if gnostic. +        if tokenizer.current == Some(b'{') +            && tokenizer.parse_state.options.mdx_expression_parse.is_none() +        {              tokenizer.tokenize_state.size += 1;          } else if tokenizer.current == Some(b'}') {              tokenizer.tokenize_state.size -= 1; @@ -165,9 +176,60 @@ pub fn eol_after(tokenizer: &mut Tokenizer) -> State {          ))      } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {          tokenizer.attempt(State::Next(StateName::MdxExpressionBefore), State::Nok); -        // To do: use `start_column` + constants.tabSize for max space to eat. -        State::Next(space_or_tab_min_max(tokenizer, 0, usize::MAX)) +        // Idea: investigate if we’d need to use more complex stripping. +        // Take this example: +        // +        // ```markdown +        // >  aaa <b c={` +        // >      d +        // >  `} /> eee +        // ``` +        // +        // Currently, the “paragraph” starts at `> | aaa`, so for the next line +        // here we split it into `>␠|␠␠␠␠|␠d` (prefix, this indent here, +        // expression data). +        // The intention above is likely for the split to be as `>␠␠|␠␠␠␠|d`, +        // which is impossible, but we can mimick it with `>␠|␠␠␠␠␠|d`. +        // +        // To improve the situation, we could take `tokenizer.line_start` at +        // the start of the expression and move past whitespace. +        // For future lines, we’d move at most to +        // `line_start_shifted.column + 4`. +        State::Retry(space_or_tab_min_max(tokenizer, 0, TAB_SIZE))      } else {          State::Retry(StateName::MdxExpressionBefore)      }  } + +/// Parse an expression with a given function. +fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> State { +    // Collect the body of the expression and positional info for each run of it. +    let result = collect( +        tokenizer, +        tokenizer.tokenize_state.start, +        &[Name::MdxExpressionData, Name::LineEnding], +    ); + +    // Turn the name of the expression into a kind. +    let kind = match tokenizer.tokenize_state.token_1 { +        Name::MdxFlowExpression | Name::MdxTextExpression => MdxExpressionKind::Expression, +        Name::MdxJsxTagAttributeExpression => MdxExpressionKind::AttributeExpression, +        Name::MdxJsxTagAttributeValueExpression => MdxExpressionKind::AttributeValueExpression, +        _ => unreachable!("cannot handle unknown expression name"), +    }; + +    // Parse and handle what was signaled back. +    match parse(&result.value, kind) { +        MdxSignal::Ok => State::Ok, +        MdxSignal::Error(message, place) => { +            let point = place_to_point(&result, place); +            State::Error(format!("{}:{}: {}", point.line, point.column, message)) +        } +        MdxSignal::Eof(message) => { +            tokenizer.tokenize_state.mdx_last_parse_error = Some(message); +            tokenizer.enter(Name::MdxExpressionData); +            tokenizer.consume(); +            State::Next(StateName::MdxExpressionInside) +        } +    } +} diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs index 9177b5b..e49a8e0 100644 --- a/src/construct/partial_mdx_jsx.rs +++ b/src/construct/partial_mdx_jsx.rs @@ -611,8 +611,6 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State {          Some(b'>') => State::Retry(StateName::MdxJsxTagEnd),          // Attribute expression.          Some(b'{') => { -            // To do: force `spread: true` if gnostic. -            // To do: pass `start_point` if gnostic.              tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone();              tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeExpression;              tokenizer.attempt( @@ -886,7 +884,6 @@ pub fn attribute_value_before(tokenizer: &mut Tokenizer) -> State {          }          // Attribute value expression.          Some(b'{') => { -            // To do: pass `start_point` if gnostic.              tokenizer.tokenize_state.token_2 = tokenizer.tokenize_state.token_1.clone();              tokenizer.tokenize_state.token_1 = Name::MdxJsxTagAttributeValueExpression;              tokenizer.attempt(  | 
