diff options
-rw-r--r-- | src/construct/mdx_jsx_flow.rs | 150 | ||||
-rw-r--r-- | src/construct/mdx_jsx_text.rs | 46 | ||||
-rw-r--r-- | src/construct/partial_mdx_jsx.rs | 163 |
3 files changed, 355 insertions, 4 deletions
diff --git a/src/construct/mdx_jsx_flow.rs b/src/construct/mdx_jsx_flow.rs new file mode 100644 index 0000000..4c3dd23 --- /dev/null +++ b/src/construct/mdx_jsx_flow.rs @@ -0,0 +1,150 @@ +//! MDX JSX (flow) occurs in the [flow][] content type. +//! +//! ## Grammar +//! +//! MDX JSX (flow) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! mdx_jsx_flow ::= mdx_jsx *space_or_tab [mdx_jsx *space_or_tab] +//! +//! ; See the `partial_mdx_jsx` construct for the BNF of that part. +//! ``` +//! +//! As this construct occurs in flow, like all flow constructs, it must be +//! followed by an eol (line ending) or eof (end of file). +//! It is allowed to use multiple tags after each other, optionally with only +//! whitespace between them. +//! +//! See [`mdx_jsx`][mdx_jsx] for more info. +//! +//! ## Tokens +//! +//! * [`MdxJsxFlowTag`][Name::MdxJsxFlowTag] +//! * [`SpaceOrTab`][Name::SpaceOrTab] +//! * see [`mdx_jsx`][mdx_jsx] for more +//! +//! ## Recommendation +//! +//! See [`mdx_jsx`][mdx_jsx] for recommendations. +//! +//! ## References +//! +//! * [`jsx-flow.js` in `micromark-extension-mdx-jsx`](https://github.com/micromark/micromark-extension-mdx-jsx/blob/main/dev/lib/jsx-flow.js) +//! * [`mdxjs.com`](https://mdxjs.com) +//! +//! [flow]: crate::construct::flow +//! [mdx_jsx]: crate::construct::partial_mdx_jsx + +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; +use crate::util::constant::TAB_SIZE; + +/// Start of MDX: JSX (flow). +/// +/// ```markdown +/// > | <A /> +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + if tokenizer.parse_state.options.constructs.mdx_jsx_flow { + tokenizer.tokenize_state.token_1 = Name::MdxJsxFlowTag; + tokenizer.concrete = true; + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt(State::Next(StateName::MdxJsxFlowBefore), State::Nok); + State::Retry(space_or_tab_min_max( + tokenizer, + 0, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::MdxJsxFlowBefore) + } + } else { + State::Nok + } +} + +/// After optional whitespace, before of MDX JSX (flow). +/// +/// ```markdown +/// > | <A /> +/// ^ +/// ``` +pub fn before(tokenizer: &mut Tokenizer) -> State { + if Some(b'<') == tokenizer.current { + tokenizer.attempt( + State::Next(StateName::MdxJsxFlowAfter), + State::Next(StateName::MdxJsxFlowNok), + ); + State::Retry(StateName::MdxJsxStart) + } else { + State::Retry(StateName::MdxJsxFlowNok) + } +} + +/// After an MDX JSX (flow) tag. +/// +/// ```markdown +/// > | <A> +/// ^ +/// ``` +pub fn after(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\t' | b' ') => { + tokenizer.attempt(State::Next(StateName::MdxJsxFlowEnd), State::Nok); + State::Retry(space_or_tab(tokenizer)) + } + _ => State::Retry(StateName::MdxJsxFlowEnd), + } +} + +/// After an MDX JSX (flow) tag, after optional whitespace. +/// +/// ```markdown +/// > | <A> <B> +/// ^ +/// ``` +pub fn end(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None | Some(b'\n') => { + reset(tokenizer); + State::Ok + } + // Another? + Some(b'<') => { + tokenizer.attempt( + State::Next(StateName::MdxJsxFlowAfter), + State::Next(StateName::MdxJsxFlowNok), + ); + State::Retry(StateName::MdxJsxStart) + } + _ => { + reset(tokenizer); + State::Nok + } + } +} + +/// At something that wasn’t an MDX JSX (flow) tag. +/// +/// ```markdown +/// > | <A> x +/// ^ +/// ``` +pub fn nok(tokenizer: &mut Tokenizer) -> State { + reset(tokenizer); + State::Nok +} + +/// Reset state. +fn reset(tokenizer: &mut Tokenizer) { + tokenizer.concrete = false; + tokenizer.tokenize_state.token_1 = Name::Data; +} diff --git a/src/construct/mdx_jsx_text.rs b/src/construct/mdx_jsx_text.rs index f4cab4e..782e215 100644 --- a/src/construct/mdx_jsx_text.rs +++ b/src/construct/mdx_jsx_text.rs @@ -1,4 +1,34 @@ -//! To do. +//! MDX JSX (text) occurs in the [text][] content type. +//! +//! ## Grammar +//! +//! MDX JSX (text) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! mdx_jsx_text ::= mdx_jsx +//! +//! ; See the `partial_mdx_jsx` construct for the BNF of that part. +//! ``` +//! +//! See [`mdx_jsx`][mdx_jsx] for more info. +//! +//! ## Tokens +//! +//! * [`MdxJsxTextTag`][Name::MdxJsxTextTag] +//! * see [`mdx_jsx`][mdx_jsx] for more +//! +//! ## Recommendation +//! +//! See [`mdx_jsx`][mdx_jsx] for recommendations. +//! +//! ## References +//! +//! * [`jsx-text.js` in `micromark-extension-mdx-jsx`](https://github.com/micromark/micromark-extension-mdx-jsx/blob/main/dev/lib/jsx-text.js) +//! * [`mdxjs.com`](https://mdxjs.com) +//! +//! [text]: crate::construct::text +//! [mdx_jsx]: crate::construct::partial_mdx_jsx use crate::event::Name; use crate::state::{Name as StateName, State}; @@ -23,13 +53,23 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } } -/// To do +/// After an MDX JSX (text) tag. +/// +/// ```markdown +/// > | a <b> c +/// ^ +/// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; State::Ok } -/// To do +/// At something that wasn’t an MDX JSX (text) tag. +/// +/// ```markdown +/// > | a < b +/// ^ +/// ``` pub fn nok(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; State::Nok diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs index ac4b7d8..1a51608 100644 --- a/src/construct/partial_mdx_jsx.rs +++ b/src/construct/partial_mdx_jsx.rs @@ -1,4 +1,165 @@ -//! To do. +//! MDX JSX occurs in [MDX JSX (flow)][mdx_jsx_flow] and +//! [MDX JSX (text)][mdx_jsx_text]. +//! +//! ## Grammar +//! +//! MDX JSX forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! ; constraint: markdown whitespace (`space_or_tab | eol`) is NOT +//! ; allowed directly after `<` in order to allow `1 < 3` in markdown. +//! mdx_jsx ::= +//! '<' [closing] +//! [*whitespace name [attributes_after_identifier] [closing]] +//! *whitespace '>' +//! +//! attributes_after_identifier ::= +//! 1*whitespace (attributes_boolean | attributes_value) | +//! *whitespace attributes_expression | +//! attributes_after_value ::= +//! *whitespace (attributes_boolean | attributes_expression | attributes_value) +//! attributes_boolean ::= key [attributes_after_identifier] +//! ; Note: in gnostic mode the value of the expression must instead be a single valid ES spread +//! ; expression +//! attributes_expression ::= expression [attributes_after_value] +//! attributes_value ::= key initializer [attributes_after_value] +//! +//! closing ::= *whitespace '/' +//! +//! name ::= identifier [local | members] +//! key ::= identifier [local] +//! local ::= *whitespace ':' *whitespace identifier +//! members ::= member *member +//! member ::= *whitespace '.' *whitespace identifier +//! +//! identifier ::= identifier_start *identifier_part +//! initializer ::= *whitespace '=' *whitespace value +//! value ::= double_quoted | single_quoted | expression +//! ; Note: in gnostic mode the value must instead be a single valid ES expression +//! expression ::= '{' *(expression_text | expression) '}' +//! +//! double_quoted ::= '"' *double_quoted_text '"' +//! single_quoted ::= "'" *single_quoted_text "'" +//! +//! text ::= char - '<' - '{' +//! whitespace ::= es_whitespace +//! double_quoted_text ::= char - '"' +//! single_quoted_text ::= char - "'" +//! expression_text ::= char - '{' - '}' +//! identifier_start ::= es_identifier_start +//! identifier_part ::= es_identifier_part | '-' +//! +//! ; ECMAScript +//! ; See “Identifier_start”: <https://tc39.es/ecma262/#prod-IdentifierStart> +//! es_identifier_start ::= ? +//! ; See “Identifier_part”: <https://tc39.es/ecma262/#prod-IdentifierPart> +//! es_identifier_part ::= ? +//! ; See “Whitespace”: <https://tc39.es/ecma262/#prod-WhiteSpace> +//! es_whitespace ::= ? +//! ``` +//! +//! The grammar for JSX in markdown is much stricter than that of HTML in +//! markdown. +//! The primary benefit of this is that tags are parsed into tokens, and thus +//! can be processed. +//! Another, arguable, benefit of this is that it comes with syntax errors: if +//! an author types something that is nonsensical, an error is thrown with +//! information about where it happened, what occurred, and what was expected +//! instead. +//! +//! ## Tokens +//! +//! * [`LineEnding`][Name::LineEnding] +//! * [`MdxJsxEsWhitespace`][Name::MdxJsxEsWhitespace] +//! * [`MdxJsxTagMarker`][Name::MdxJsxTagMarker] +//! * [`MdxJsxTagClosingMarker`][Name::MdxJsxTagClosingMarker] +//! * [`MdxJsxTagName`][Name::MdxJsxTagName] +//! * [`MdxJsxTagNamePrimary`][Name::MdxJsxTagNamePrimary] +//! * [`MdxJsxTagNameMemberMarker`][Name::MdxJsxTagNameMemberMarker] +//! * [`MdxJsxTagNamePrefixMarker`][Name::MdxJsxTagNamePrefixMarker] +//! * [`MdxJsxTagNameMember`][Name::MdxJsxTagNameMember] +//! * [`MdxJsxTagNameLocal`][Name::MdxJsxTagNameLocal] +//! * [`MdxJsxTagAttribute`][Name::MdxJsxTagAttribute] +//! * [`MdxJsxTagAttributeName`][Name::MdxJsxTagAttributeName] +//! * [`MdxJsxTagAttributePrimaryName`][Name::MdxJsxTagAttributePrimaryName] +//! * [`MdxJsxTagAttributeNamePrefixMarker`][Name::MdxJsxTagAttributeNamePrefixMarker] +//! * [`MdxJsxTagAttributeNameLocal`][Name::MdxJsxTagAttributeNameLocal] +//! * [`MdxJsxTagAttributeInitializerMarker`][Name::MdxJsxTagAttributeInitializerMarker] +//! * [`MdxJsxTagAttributeValueLiteral`][Name::MdxJsxTagAttributeValueLiteral] +//! * [`MdxJsxTagAttributeValueLiteralMarker`][Name::MdxJsxTagAttributeValueLiteralMarker] +//! * [`MdxJsxTagAttributeValueLiteralValue`][Name::MdxJsxTagAttributeValueLiteralValue] +//! * [`MdxJsxTagSelfClosingMarker`][Name::MdxJsxTagSelfClosingMarker] +//! +//! ## Recommendation +//! +//! When authoring markdown with JSX, keep in mind that MDX is a whitespace +//! sensitive and line-based language, while JavaScript is insensitive to +//! whitespace. +//! This affects how markdown and JSX interleave with eachother in MDX. +//! For more info on how it works, see [§ Interleaving][interleaving] on the +//! MDX site. +//! +//! ###### Comments inside tags +//! +//! JavaScript comments in JSX are not supported. +//! +//! Incorrect: +//! +//! ```jsx +//! <hi/*comment!*//> +//! <hello// comment! +//! /> +//! ``` +//! +//! Correct: +//! +//! ```jsx +//! <hi/> +//! <hello +//! /> +//! ``` +//! +//! A PR that adds support for them would be accepted. +//! +//! ###### Element or fragment attribute values +//! +//! JSX elements or JSX fragments as attribute values are not supported. +//! The reason for this change is that it would be confusing whether markdown +//! would work. +//! +//! Incorrect: +//! +//! ```jsx +//! <welcome name=<>Venus</> /> +//! <welcome name=<span>Pluto</span> /> +//! ``` +//! +//! Correct: +//! +//! ```jsx +//! <welcome name='Mars' /> +//! <welcome name={<span>Jupiter</span>} /> +//! ``` +//! +//! ###### Greater than (`>`) and right curly brace (`}`) +//! +//! JSX does not allow U+003E GREATER THAN (`>`) or U+007D RIGHT CURLY BRACE +//! (`}`) literally in text, they need to be encoded as character references +//! (or expressions). +//! There is no good reason for this (some JSX parsers agree with us and don’t +//! crash either). +//! Therefore, in MDX, U+003E GREATER THAN (`>`) and U+007D RIGHT CURLY BRACE +//! (`}`) are fine literally and don’t need to be encoded. +//! +//! ## References +//! +//! * [`jsx-flow.js` in `micromark-extension-mdx-jsx`](https://github.com/micromark/micromark-extension-mdx-jsx/blob/main/dev/lib/jsx-flow.js) +//! * [`mdxjs.com`](https://mdxjs.com) +//! +//! [mdx_jsx_flow]: crate::construct::mdx_jsx_flow +//! [mdx_jsx_text]: crate::construct::mdx_jsx_text +//! [interleaving]: https://mdxjs.com/docs/what-is-mdx/#interleaving use crate::event::Name; use crate::state::{Name as StateName, State}; |