//! MDX JSX occurs in [MDX JSX (flow)][mdx_jsx_flow] and //! [MDX JSX (text)][mdx_jsx_text]. //! //! ## Grammar //! //! MDX JSX forms with the following BNF //! (see [construct][crate::construct] for character groups): //! //! ```bnf //! ; constraint: markdown whitespace (`space_or_tab | eol`) is NOT //! ; allowed directly after `<` in order to allow `1 < 3` in markdown. //! mdx_jsx ::= //! '<' [closing] //! [*whitespace name [attributes_after_identifier] [closing]] //! *whitespace '>' //! //! attributes_after_identifier ::= //! 1*whitespace (attributes_boolean | attributes_value) | //! *whitespace attributes_expression | //! attributes_after_value ::= //! *whitespace (attributes_boolean | attributes_expression | attributes_value) //! attributes_boolean ::= key [attributes_after_identifier] //! ; Note: in gnostic mode the value of the expression must instead be a single valid ES spread //! ; expression //! attributes_expression ::= expression [attributes_after_value] //! attributes_value ::= key initializer [attributes_after_value] //! //! closing ::= *whitespace '/' //! //! name ::= identifier [local | members] //! key ::= identifier [local] //! local ::= *whitespace ':' *whitespace identifier //! members ::= member *member //! member ::= *whitespace '.' *whitespace identifier //! //! identifier ::= identifier_start *identifier_part //! initializer ::= *whitespace '=' *whitespace value //! value ::= double_quoted | single_quoted | expression //! ; Note: in gnostic mode the value must instead be a single valid ES expression //! expression ::= '{' *(expression_text | expression) '}' //! //! double_quoted ::= '"' *double_quoted_text '"' //! single_quoted ::= "'" *single_quoted_text "'" //! //! text ::= char - '<' - '{' //! whitespace ::= es_whitespace //! double_quoted_text ::= char - '"' //! single_quoted_text ::= char - "'" //! expression_text ::= char - '{' - '}' //! identifier_start ::= es_identifier_start //! identifier_part ::= es_identifier_part | '-' //! //! ; ECMAScript //! ; See “Identifier_start”: //! es_identifier_start ::= ? //! ; See “Identifier_part”: //! es_identifier_part ::= ? //! ; See “Whitespace”: //! es_whitespace ::= ? //! ``` //! //! The grammar for JSX in markdown is much stricter than that of HTML in //! markdown. //! The primary benefit of this is that tags are parsed into tokens, and thus //! can be processed. //! Another, arguable, benefit of this is that it comes with syntax errors: if //! an author types something that is nonsensical, an error is thrown with //! information about where it happened, what occurred, and what was expected //! instead. //! //! ## Tokens //! //! * [`LineEnding`][Name::LineEnding] //! * [`MdxJsxEsWhitespace`][Name::MdxJsxEsWhitespace] //! * [`MdxJsxTagMarker`][Name::MdxJsxTagMarker] //! * [`MdxJsxTagClosingMarker`][Name::MdxJsxTagClosingMarker] //! * [`MdxJsxTagName`][Name::MdxJsxTagName] //! * [`MdxJsxTagNamePrimary`][Name::MdxJsxTagNamePrimary] //! * [`MdxJsxTagNameMemberMarker`][Name::MdxJsxTagNameMemberMarker] //! * [`MdxJsxTagNamePrefixMarker`][Name::MdxJsxTagNamePrefixMarker] //! * [`MdxJsxTagNameMember`][Name::MdxJsxTagNameMember] //! * [`MdxJsxTagNameLocal`][Name::MdxJsxTagNameLocal] //! * [`MdxJsxTagAttribute`][Name::MdxJsxTagAttribute] //! * [`MdxJsxTagAttributeName`][Name::MdxJsxTagAttributeName] //! * [`MdxJsxTagAttributePrimaryName`][Name::MdxJsxTagAttributePrimaryName] //! * [`MdxJsxTagAttributeNamePrefixMarker`][Name::MdxJsxTagAttributeNamePrefixMarker] //! * [`MdxJsxTagAttributeNameLocal`][Name::MdxJsxTagAttributeNameLocal] //! * [`MdxJsxTagAttributeInitializerMarker`][Name::MdxJsxTagAttributeInitializerMarker] //! * [`MdxJsxTagAttributeValueLiteral`][Name::MdxJsxTagAttributeValueLiteral] //! * [`MdxJsxTagAttributeValueLiteralMarker`][Name::MdxJsxTagAttributeValueLiteralMarker] //! * [`MdxJsxTagAttributeValueLiteralValue`][Name::MdxJsxTagAttributeValueLiteralValue] //! * [`MdxJsxTagSelfClosingMarker`][Name::MdxJsxTagSelfClosingMarker] //! //! ## Recommendation //! //! When authoring markdown with JSX, keep in mind that MDX is a whitespace //! sensitive and line-based language, while JavaScript is insensitive to //! whitespace. //! This affects how markdown and JSX interleave with eachother in MDX. //! For more info on how it works, see [§ Interleaving][interleaving] on the //! MDX site. //! //! ###### Comments inside tags //! //! JavaScript comments in JSX are not supported. //! //! Incorrect: //! //! ```jsx //! //! //! ``` //! //! Correct: //! //! ```jsx //! //! //! ``` //! //! A PR that adds support for them would be accepted. //! //! ###### Element or fragment attribute values //! //! JSX elements or JSX fragments as attribute values are not supported. //! The reason for this change is that it would be confusing whether markdown //! would work. //! //! Incorrect: //! //! ```jsx //! Venus /> //! Pluto /> //! ``` //! //! Correct: //! //! ```jsx //! //! Jupiter} /> //! ``` //! //! ###### Greater than (`>`) and right curly brace (`}`) //! //! JSX does not allow U+003E GREATER THAN (`>`) or U+007D RIGHT CURLY BRACE //! (`}`) literally in text, they need to be encoded as character references //! (or expressions). //! There is no good reason for this (some JSX parsers agree with us and don’t //! crash either). //! Therefore, in MDX, U+003E GREATER THAN (`>`) and U+007D RIGHT CURLY BRACE //! (`}`) are fine literally and don’t need to be encoded. //! //! ## References //! //! * [`jsx-flow.js` in `micromark-extension-mdx-jsx`](https://github.com/micromark/micromark-extension-mdx-jsx/blob/main/dev/lib/jsx-flow.js) //! * [`mdxjs.com`](https://mdxjs.com) //! //! [mdx_jsx_flow]: crate::construct::mdx_jsx_flow //! [mdx_jsx_text]: crate::construct::mdx_jsx_text //! [interleaving]: https://mdxjs.com/docs/what-is-mdx/#interleaving use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::char::{ after_index as char_after_index, format_byte, format_opt as format_char_opt, kind_after_index, Kind as CharacterKind, }; use alloc::format; use core::str; use unicode_id::UnicodeID; /// Start of MDX: JSX. /// /// ```markdown /// > | a c /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { debug_assert_eq!(tokenizer.current, Some(b'<')); tokenizer.enter(tokenizer.tokenize_state.token_1.clone()); tokenizer.enter(Name::MdxJsxTagMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagMarker); State::Next(StateName::MdxJsxStartAfter) } /// After `<`. /// /// ```markdown /// > | a c /// ^ /// ``` pub fn start_after(tokenizer: &mut Tokenizer) -> State { // Deviate from JSX, which allows arbitrary whitespace. // See: . if let Some(b'\t' | b'\n' | b' ') = tokenizer.current { State::Nok } else { tokenizer.attempt(State::Next(StateName::MdxJsxNameBefore), State::Nok); State::Retry(StateName::MdxJsxEsWhitespaceStart) } } /// Before name, self slash, or end of tag for fragments. /// /// ```markdown /// > | a c /// ^ /// > | a c /// ^ /// > | a <> b /// ^ /// ``` pub fn name_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Closing tag. Some(b'/') => { tokenizer.enter(Name::MdxJsxTagClosingMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagClosingMarker); tokenizer.attempt( State::Next(StateName::MdxJsxClosingTagNameBefore), State::Nok, ); State::Next(StateName::MdxJsxEsWhitespaceStart) } // Fragment opening tag. Some(b'>') => State::Retry(StateName::MdxJsxTagEnd), _ => { if id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.enter(Name::MdxJsxTagName); tokenizer.enter(Name::MdxJsxTagNamePrimary); tokenizer.consume(); State::Next(StateName::MdxJsxPrimaryName) } else { crash( tokenizer, "before name", &format!( "a character that can start a name, such as a letter, `$`, or `_`{}", if tokenizer.current == Some(b'!') { " (note: to create a comment in MDX, use `{/* text */}`)" } else { "" } ), ) } } } } /// Before name of closing tag or end of closing fragment tag. /// /// ```markdown /// > | a b /// ^ /// > | a c /// ^ /// ``` pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State { // Fragment closing tag. if let Some(b'>') = tokenizer.current { State::Retry(StateName::MdxJsxTagEnd) } // Start of a closing tag name. else if id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.enter(Name::MdxJsxTagName); tokenizer.enter(Name::MdxJsxTagNamePrimary); tokenizer.consume(); State::Next(StateName::MdxJsxPrimaryName) } else { crash( tokenizer, "before name", &format!( "a character that can start a name, such as a letter, `$`, or `_`{}", if tokenizer.current == Some(b'*' | b'/') { " (note: JS comments in JSX tags are not supported in MDX)" } else { "" } ), ) } } /// In primary name. /// /// ```markdown /// > | a d /// ^ /// ``` pub fn primary_name(tokenizer: &mut Tokenizer) -> State { // End of name. if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'.' | b'/' | b':' | b'>' | b'{')) { tokenizer.exit(Name::MdxJsxTagNamePrimary); tokenizer.attempt(State::Next(StateName::MdxJsxPrimaryNameAfter), State::Nok); State::Retry(StateName::MdxJsxEsWhitespaceStart) } // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.consume(); State::Next(StateName::MdxJsxPrimaryName) } else { crash( tokenizer, "in name", &format!( "a name character such as letters, digits, `$`, or `_`; whitespace before attributes; or the end of the tag{}", if tokenizer.current == Some(b'@') { " (note: to create a link in MDX, use `[text](url)`)" } else { "" } ), ) } } /// After primary name. /// /// ```markdown /// > | a d /// ^ /// > | a d /// ^ /// ``` pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Start of a member name. Some(b'.') => { tokenizer.enter(Name::MdxJsxTagNameMemberMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagNameMemberMarker); tokenizer.attempt(State::Next(StateName::MdxJsxMemberNameBefore), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } // Start of a local name. Some(b':') => { tokenizer.enter(Name::MdxJsxTagNamePrefixMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagNamePrefixMarker); tokenizer.attempt(State::Next(StateName::MdxJsxLocalNameBefore), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } // End of name. _ => { if matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) || id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.exit(Name::MdxJsxTagName); State::Retry(StateName::MdxJsxAttributeBefore) } else { crash( tokenizer, "after name", "a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag" ) } } } } /// Before member name. /// /// ```markdown /// > | a d /// ^ /// ``` pub fn member_name_before(tokenizer: &mut Tokenizer) -> State { // Start of a member name. if id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.enter(Name::MdxJsxTagNameMember); tokenizer.consume(); State::Next(StateName::MdxJsxMemberName) } else { crash( tokenizer, "before member name", "a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag" ) } } /// In member name. /// /// ```markdown /// > | a e /// ^ /// ``` pub fn member_name(tokenizer: &mut Tokenizer) -> State { // End of name. // Note: no `:` allowed here. if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'.' | b'/' | b'>' | b'{')) { tokenizer.exit(Name::MdxJsxTagNameMember); tokenizer.attempt(State::Next(StateName::MdxJsxMemberNameAfter), State::Nok); State::Retry(StateName::MdxJsxEsWhitespaceStart) } // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.consume(); State::Next(StateName::MdxJsxMemberName) } else { crash( tokenizer, "in member name", &format!( "a name character such as letters, digits, `$`, or `_`; whitespace before attributes; or the end of the tag{}", if tokenizer.current == Some(b'@') { " (note: to create a link in MDX, use `[text](url)`)" } else { "" } ), ) } } /// After member name. /// /// ```markdown /// > | a d /// ^ /// > | a e /// ^ /// ``` pub fn member_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Start of another member name. Some(b'.') => { tokenizer.enter(Name::MdxJsxTagNameMemberMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagNameMemberMarker); tokenizer.attempt(State::Next(StateName::MdxJsxMemberNameBefore), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } // End of name. _ => { if matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) || id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.exit(Name::MdxJsxTagName); State::Retry(StateName::MdxJsxAttributeBefore) } else { crash( tokenizer, "after member name", "a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag" ) } } } } /// Local member name. /// /// ```markdown /// > | a d /// ^ /// ``` pub fn local_name_before(tokenizer: &mut Tokenizer) -> State { // Start of a local name. if id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.enter(Name::MdxJsxTagNameLocal); tokenizer.consume(); State::Next(StateName::MdxJsxLocalName) } else { crash( tokenizer, "before local name", &format!( "a character that can start a name, such as a letter, `$`, or `_`{}", if matches!(tokenizer.current, Some(b'+' | b'/'..=b'9')) { " (note: to create a link in MDX, use `[text](url)`)" } else { "" } ), ) } } /// In local name. /// /// ```markdown /// > | a e /// ^ /// ``` pub fn local_name(tokenizer: &mut Tokenizer) -> State { // End of local name (note that we don’t expect another colon, or a member). if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) { tokenizer.exit(Name::MdxJsxTagNameLocal); tokenizer.attempt(State::Next(StateName::MdxJsxLocalNameAfter), State::Nok); State::Retry(StateName::MdxJsxEsWhitespaceStart) } // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.consume(); State::Next(StateName::MdxJsxLocalName) } else { crash( tokenizer, "in local name", "a name character such as letters, digits, `$`, or `_`; whitespace before attributes; or the end of the tag" ) } } /// After local name. /// /// This is like as `primary_name_after`, but we don’t expect colons or /// periods. /// /// ```markdown /// > | a d /// ^ /// > | a e /// ^ /// ``` pub fn local_name_after(tokenizer: &mut Tokenizer) -> State { // End of name. if matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) || id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.exit(Name::MdxJsxTagName); State::Retry(StateName::MdxJsxAttributeBefore) } else { crash( tokenizer, "after local name", "a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag" ) } } /// Before attribute. /// /// ```markdown /// > | a c /// ^ /// > | a c /// ^ /// > | a d /// ^ /// > | a d /// ^ /// ``` pub fn attribute_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Self-closing. Some(b'/') => { tokenizer.enter(Name::MdxJsxTagSelfClosingMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagSelfClosingMarker); tokenizer.attempt(State::Next(StateName::MdxJsxSelfClosing), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } // End of tag. Some(b'>') => State::Retry(StateName::MdxJsxTagEnd), // Attribute expression. Some(b'{') => unreachable!("to do: attribute expression"), _ => { // Start of an attribute name. if id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.enter(Name::MdxJsxTagAttribute); tokenizer.enter(Name::MdxJsxTagAttributeName); tokenizer.enter(Name::MdxJsxTagAttributePrimaryName); tokenizer.consume(); State::Next(StateName::MdxJsxAttributePrimaryName) } else { crash( tokenizer, "before attribute name", "a character that can start an attribute name, such as a letter, `$`, or `_`; whitespace before attributes; or the end of the tag" ) } } } } /// In primary attribute name. /// /// ```markdown /// > | a e /// ^ /// > | a e /// ^ /// > | a e /// ^ /// ``` pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State { // End of attribute name or tag. if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'/' | b':' | b'=' | b'>' | b'{')) { tokenizer.exit(Name::MdxJsxTagAttributePrimaryName); tokenizer.attempt( State::Next(StateName::MdxJsxAttributePrimaryNameAfter), State::Nok, ); State::Retry(StateName::MdxJsxEsWhitespaceStart) } // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.consume(); State::Next(StateName::MdxJsxAttributePrimaryName) } else { crash( tokenizer, "in attribute name", "an attribute name character such as letters, digits, `$`, or `_`; `=` to initialize a value; whitespace before attributes; or the end of the tag" ) } } /// After primary attribute name. /// /// ```markdown /// > | a d /// ^ /// > | a e /// ^ /// > | a e /// ^ /// ``` pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Start of a local name. Some(b':') => { tokenizer.enter(Name::MdxJsxTagAttributeNamePrefixMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagAttributeNamePrefixMarker); tokenizer.attempt( State::Next(StateName::MdxJsxAttributeLocalNameBefore), State::Nok, ); State::Next(StateName::MdxJsxEsWhitespaceStart) } // Initializer: start of an attribute value. Some(b'=') => { tokenizer.exit(Name::MdxJsxTagAttributeName); tokenizer.enter(Name::MdxJsxTagAttributeInitializerMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagAttributeInitializerMarker); tokenizer.attempt( State::Next(StateName::MdxJsxAttributeValueBefore), State::Nok, ); State::Retry(StateName::MdxJsxEsWhitespaceStart) } _ => { // End of tag / new attribute. if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) || id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.exit(Name::MdxJsxTagAttributeName); tokenizer.exit(Name::MdxJsxTagAttribute); tokenizer.attempt(State::Next(StateName::MdxJsxAttributeBefore), State::Nok); State::Retry(StateName::MdxJsxEsWhitespaceStart) } else { crash( tokenizer, "after attribute name", "a character that can start an attribute name, such as a letter, `$`, or `_`; `=` to initialize a value; or the end of the tag" ) } } } } /// Before local attribute name. /// /// ```markdown /// > | a e /// ^ /// ``` pub fn attribute_local_name_before(tokenizer: &mut Tokenizer) -> State { // Start of a local name. if id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.enter(Name::MdxJsxTagAttributeNameLocal); tokenizer.consume(); State::Next(StateName::MdxJsxAttributeLocalName) } else { crash( tokenizer, "before local attribute name", "a character that can start an attribute name, such as a letter, `$`, or `_`; `=` to initialize a value; or the end of the tag" ) } } /// In local attribute name. /// /// ```markdown /// > | a f /// ^ /// > | a f /// ^ /// ``` pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State { // End of local name (note that we don’t expect another colon). if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'/' | b'=' | b'>' | b'{')) { tokenizer.exit(Name::MdxJsxTagAttributeNameLocal); tokenizer.exit(Name::MdxJsxTagAttributeName); tokenizer.attempt( State::Next(StateName::MdxJsxAttributeLocalNameAfter), State::Nok, ); State::Retry(StateName::MdxJsxEsWhitespaceStart) } // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.consume(); State::Next(StateName::MdxJsxAttributeLocalName) } else { crash( tokenizer, "in local attribute name", "an attribute name character such as letters, digits, `$`, or `_`; `=` to initialize a value; whitespace before attributes; or the end of the tag" ) } } /// After local attribute name. /// /// ```markdown /// > | a f /// ^ /// > | a f /// ^ /// ``` pub fn attribute_local_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Start of an attribute value. Some(b'=') => { tokenizer.enter(Name::MdxJsxTagAttributeInitializerMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagAttributeInitializerMarker); tokenizer.attempt( State::Next(StateName::MdxJsxAttributeValueBefore), State::Nok, ); State::Next(StateName::MdxJsxEsWhitespaceStart) } _ => { // End of name. if matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) || id_start(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { tokenizer.exit(Name::MdxJsxTagAttribute); State::Retry(StateName::MdxJsxAttributeBefore) } else { crash( tokenizer, "after local attribute name", "a character that can start an attribute name, such as a letter, `$`, or `_`; `=` to initialize a value; or the end of the tag" ) } } } } /// After `=`, before value. /// /// ```markdown /// > | a e /// ^ /// > | a e /// ^ /// ``` pub fn attribute_value_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Start of double- or single quoted value. Some(b'"' | b'\'') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); tokenizer.enter(Name::MdxJsxTagAttributeValueLiteral); tokenizer.enter(Name::MdxJsxTagAttributeValueLiteralMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagAttributeValueLiteralMarker); State::Next(StateName::MdxJsxAttributeValueQuotedStart) } // Attribute value expression. Some(b'{') => unreachable!("to do: attribute value expression"), _ => crash( tokenizer, "before attribute value", &format!( "a character that can start an attribute value, such as `\"`, `'`, or `{{`{}", if tokenizer.current == Some(b'<') { " (note: to use an element or fragment as a prop value in MDX, use `{}`)" } else { "" } ), ), } } /// Before quoted literal attribute value. /// /// ```markdown /// > | a e /// ^ /// ``` pub fn attribute_value_quoted_start(tokenizer: &mut Tokenizer) -> State { if let Some(byte) = tokenizer.current { if byte == tokenizer.tokenize_state.marker { tokenizer.tokenize_state.marker = 0; tokenizer.enter(Name::MdxJsxTagAttributeValueLiteralMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagAttributeValueLiteralMarker); tokenizer.exit(Name::MdxJsxTagAttributeValueLiteral); tokenizer.exit(Name::MdxJsxTagAttribute); tokenizer.attempt(State::Next(StateName::MdxJsxAttributeBefore), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } else if byte == b'\n' { tokenizer.attempt( State::Next(StateName::MdxJsxAttributeValueQuotedStart), State::Nok, ); State::Retry(StateName::MdxJsxEsWhitespaceStart) } else { tokenizer.enter(Name::MdxJsxTagAttributeValueLiteralValue); State::Retry(StateName::MdxJsxAttributeValueQuoted) } } else { crash( tokenizer, "in attribute value", &format!( "a corresponding closing quote {}", format_byte(tokenizer.tokenize_state.marker) ), ) } } /// In quoted literal attribute value. /// /// ```markdown /// > | a e /// ^ /// ``` pub fn attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(tokenizer.tokenize_state.marker) || matches!(tokenizer.current, None | Some(b'\n')) { tokenizer.exit(Name::MdxJsxTagAttributeValueLiteralValue); State::Retry(StateName::MdxJsxAttributeValueQuotedStart) } else { tokenizer.consume(); State::Next(StateName::MdxJsxAttributeValueQuoted) } } /// After self-closing slash. /// /// ```markdown /// > | a c /// ^ /// ``` pub fn self_closing(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => State::Retry(StateName::MdxJsxTagEnd), _ => crash( tokenizer, "after self-closing slash", &format!( "`>` to end the tag{}", if tokenizer.current == Some(b'*' | b'/') { " (note: JS comments in JSX tags are not supported in MDX)" } else { "" } ), ), } } /// At final `>`. /// /// ```markdown /// > | a c /// ^ /// ``` pub fn tag_end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { tokenizer.enter(Name::MdxJsxTagMarker); tokenizer.consume(); tokenizer.exit(Name::MdxJsxTagMarker); tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); State::Ok } _ => unreachable!("expected `>`"), } } /// Before optional ECMAScript whitespace. /// /// ```markdown /// > | a c /// ^ /// ``` pub fn es_whitespace_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { tokenizer.enter(Name::LineEnding); tokenizer.consume(); tokenizer.exit(Name::LineEnding); State::Next(StateName::MdxJsxEsWhitespaceEolAfter) } _ => { if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace { tokenizer.enter(Name::MdxJsxEsWhitespace); State::Retry(StateName::MdxJsxEsWhitespaceInside) } else { State::Ok } } } } /// In ECMAScript whitespace. /// /// ```markdown /// > | a c /// ^ /// ``` pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { tokenizer.exit(Name::MdxJsxEsWhitespace); tokenizer.enter(Name::LineEnding); tokenizer.consume(); tokenizer.exit(Name::LineEnding); State::Next(StateName::MdxJsxEsWhitespaceEolAfter) } // Allow continuation bytes. Some(0x80..=0xBF) => { tokenizer.consume(); State::Next(StateName::MdxJsxEsWhitespaceInside) } _ => { if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace { tokenizer.consume(); State::Next(StateName::MdxJsxEsWhitespaceInside) } else { tokenizer.exit(Name::MdxJsxEsWhitespace); State::Ok } } } } /// After eol in whitespace. /// /// ```markdown /// > | a c /// ^ /// ``` pub fn es_whitespace_eol_after(tokenizer: &mut Tokenizer) -> State { // Lazy continuation in a flow tag is a syntax error. if tokenizer.tokenize_state.token_1 == Name::MdxJsxFlowTag && tokenizer.lazy { State::Error(format!( "{}:{}: Unexpected lazy line in jsx in container, expected line to be prefixed with `>` when in a block quote, whitespace when in a list, etc", tokenizer.point.line, tokenizer.point.column )) } else { State::Retry(StateName::MdxJsxEsWhitespaceStart) } } /// Check if a character can start a JSX identifier. fn id_start(code: Option) -> bool { if let Some(char) = code { UnicodeID::is_id_start(char) || matches!(char, '$' | '_') } else { false } } /// Check if a character can continue a JSX identifier. fn id_cont(code: Option) -> bool { if let Some(char) = code { UnicodeID::is_id_continue(char) || matches!(char, '-' | '\u{200c}' | '\u{200d}') } else { false } } /// Crash because something happened `at`, with info on what was `expect`ed /// instead. fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> State { State::Error(format!( "{}:{}: Unexpected {} {}, expected {}", tokenizer.point.line, tokenizer.point.column, format_char_opt(if tokenizer.current == None { None } else { char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) }), at, expect )) }