From b4256dc2e1352a2b74f29b2650150cc8b57f54ed Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 8 Sep 2022 15:46:46 +0200 Subject: Add support for mdx jsx (flow) --- src/compiler.rs | 8 ++ src/construct/flow.rs | 19 +++- src/construct/mdx_jsx_text.rs | 5 +- src/construct/mod.rs | 4 +- src/construct/partial_mdx_jsx.rs | 189 ++++++++++++++++++++++++--------------- src/event.rs | 54 ++++++----- src/lib.rs | 9 ++ src/state.rs | 26 +++++- 8 files changed, 219 insertions(+), 95 deletions(-) (limited to 'src') diff --git a/src/compiler.rs b/src/compiler.rs index 572cc4e..e878c09 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -365,6 +365,7 @@ fn enter(context: &mut CompileContext) { | Name::HeadingSetextText | Name::Label | Name::MdxJsxTextTag + | Name::MdxJsxFlowTag | Name::ReferenceString | Name::ResourceTitleString => on_enter_buffer(context), @@ -468,6 +469,7 @@ fn exit(context: &mut CompileContext) { Name::ListOrdered | Name::ListUnordered => on_exit_list(context), Name::ListItem => on_exit_list_item(context), Name::ListItemValue => on_exit_list_item_value(context), + Name::MdxJsxFlowTag => on_exit_mdx_jsx_flow_tag(context), Name::Paragraph => on_exit_paragraph(context), Name::ReferenceString => on_exit_reference_string(context), Name::ResourceDestinationString => on_exit_resource_destination_string(context), @@ -1674,6 +1676,12 @@ fn on_exit_media(context: &mut CompileContext) { } } +/// Handle [`Exit`][Kind::Exit]:[`MdxJsxFlowTag`][Name::MdxJsxFlowTag]. +fn on_exit_mdx_jsx_flow_tag(context: &mut CompileContext) { + context.resume(); + context.slurp_one_line_ending = true; +} + /// Handle [`Exit`][Kind::Exit]:[`Paragraph`][Name::Paragraph]. fn on_exit_paragraph(context: &mut CompileContext) { let tight = context.tight_stack.last().unwrap_or(&false); diff --git a/src/construct/flow.rs b/src/construct/flow.rs index 3f7bc9c..5b2cbfe 100644 --- a/src/construct/flow.rs +++ b/src/construct/flow.rs @@ -16,6 +16,7 @@ //! * [Heading (atx)][crate::construct::heading_atx] //! * [Heading (setext)][crate::construct::heading_setext] //! * [HTML (flow)][crate::construct::html_flow] +//! * [MDX JSX (flow)][crate::construct::mdx_jsx_flow] //! * [Raw (flow)][crate::construct::raw_flow] (code (fenced), math (flow)) //! * [Thematic break][crate::construct::thematic_break] @@ -61,7 +62,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { Some(b'<') => { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeParagraph), + State::Next(StateName::FlowBeforeMdxJsx), ); State::Retry(StateName::HtmlFlowStart) } @@ -123,11 +124,25 @@ pub fn before_raw(tokenizer: &mut Tokenizer) -> State { pub fn before_html(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::FlowAfter), - State::Next(StateName::FlowBeforeHeadingAtx), + State::Next(StateName::FlowBeforeMdxJsx), ); State::Retry(StateName::HtmlFlowStart) } +/// At mdx jsx (flow). +/// +/// ```markdown +/// > | +/// ^ +/// ``` +pub fn before_mdx_jsx(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::FlowAfter), + State::Next(StateName::FlowBeforeHeadingAtx), + ); + State::Retry(StateName::MdxJsxFlowStart) +} + /// At heading (atx). /// /// ```markdown diff --git a/src/construct/mdx_jsx_text.rs b/src/construct/mdx_jsx_text.rs index 287cd70..f4cab4e 100644 --- a/src/construct/mdx_jsx_text.rs +++ b/src/construct/mdx_jsx_text.rs @@ -13,7 +13,10 @@ use crate::tokenizer::Tokenizer; pub fn start(tokenizer: &mut Tokenizer) -> State { if Some(b'<') == tokenizer.current && tokenizer.parse_state.options.constructs.mdx_jsx_text { tokenizer.tokenize_state.token_1 = Name::MdxJsxTextTag; - tokenizer.attempt(State::Next(StateName::MdxJsxTextAfter), State::Next(StateName::MdxJsxTextNok)); + tokenizer.attempt( + State::Next(StateName::MdxJsxTextAfter), + State::Next(StateName::MdxJsxTextNok), + ); State::Retry(StateName::MdxJsxStart) } else { State::Nok diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 7aaa3ee..09ec976 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -64,6 +64,7 @@ //! * [gfm table][gfm_table] //! * [gfm task list item check][gfm_task_list_item_check] //! * [mdx jsx (text)][mdx_jsx_text] +//! * [mdx jsx (flow)][mdx_jsx_flow] //! //! There are also several small subroutines typically used in different places: //! @@ -71,7 +72,7 @@ //! * [data][partial_data] //! * [destination][partial_destination] //! * [label][partial_label] -//! * [jsx text][partial_mdx_jsx] +//! * [mdx jsx][partial_mdx_jsx] //! * [non lazy continuation][partial_non_lazy_continuation] //! * [space or tab][partial_space_or_tab] //! * [space or tab, eol][partial_space_or_tab_eol] @@ -163,6 +164,7 @@ pub mod label_end; pub mod label_start_image; pub mod label_start_link; pub mod list_item; +pub mod mdx_jsx_flow; pub mod mdx_jsx_text; pub mod paragraph; pub mod partial_bom; diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs index c61dfd0..ac4b7d8 100644 --- a/src/construct/partial_mdx_jsx.rs +++ b/src/construct/partial_mdx_jsx.rs @@ -1,6 +1,5 @@ //! To do. -use crate::construct::partial_space_or_tab_eol::space_or_tab_eol; use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; @@ -23,10 +22,10 @@ use unicode_id::UnicodeID; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { debug_assert_eq!(tokenizer.current, Some(b'<')); - tokenizer.enter(Name::MdxJsxTextTag); - tokenizer.enter(Name::MdxJsxTextTagMarker); + tokenizer.enter(tokenizer.tokenize_state.token_1.clone()); + tokenizer.enter(Name::MdxJsxTagMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagMarker); + tokenizer.exit(Name::MdxJsxTagMarker); State::Next(StateName::MdxJsxStartAfter) } @@ -61,9 +60,9 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Closing tag. Some(b'/') => { - tokenizer.enter(Name::MdxJsxTextTagClosingMarker); + tokenizer.enter(Name::MdxJsxTagClosingMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagClosingMarker); + tokenizer.exit(Name::MdxJsxTagClosingMarker); tokenizer.attempt( State::Next(StateName::MdxJsxClosingTagNameBefore), State::Nok, @@ -77,8 +76,8 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State { tokenizer.parse_state.bytes, tokenizer.point.index, )) { - tokenizer.enter(Name::MdxJsxTextTagName); - tokenizer.enter(Name::MdxJsxTextTagNamePrimary); + tokenizer.enter(Name::MdxJsxTagName); + tokenizer.enter(Name::MdxJsxTagNamePrimary); tokenizer.consume(); State::Next(StateName::MdxJsxPrimaryName) } else { @@ -117,8 +116,8 @@ pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State { tokenizer.parse_state.bytes, tokenizer.point.index, )) { - tokenizer.enter(Name::MdxJsxTextTagName); - tokenizer.enter(Name::MdxJsxTextTagNamePrimary); + tokenizer.enter(Name::MdxJsxTagName); + tokenizer.enter(Name::MdxJsxTagNamePrimary); tokenizer.consume(); State::Next(StateName::MdxJsxPrimaryName) } else { @@ -148,7 +147,7 @@ pub fn primary_name(tokenizer: &mut Tokenizer) -> State { if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'.' | b'/' | b':' | b'>' | b'{')) { - tokenizer.exit(Name::MdxJsxTextTagNamePrimary); + tokenizer.exit(Name::MdxJsxTagNamePrimary); tokenizer.attempt(State::Next(StateName::MdxJsxPrimaryNameAfter), State::Nok); State::Retry(StateName::MdxJsxEsWhitespaceStart) } @@ -190,17 +189,17 @@ pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Start of a member name. Some(b'.') => { - tokenizer.enter(Name::MdxJsxTextTagNameMemberMarker); + tokenizer.enter(Name::MdxJsxTagNameMemberMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagNameMemberMarker); + tokenizer.exit(Name::MdxJsxTagNameMemberMarker); tokenizer.attempt(State::Next(StateName::MdxJsxMemberNameBefore), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } // Start of a local name. Some(b':') => { - tokenizer.enter(Name::MdxJsxTextTagNamePrefixMarker); + tokenizer.enter(Name::MdxJsxTagNamePrefixMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagNamePrefixMarker); + tokenizer.exit(Name::MdxJsxTagNamePrefixMarker); tokenizer.attempt(State::Next(StateName::MdxJsxLocalNameBefore), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } @@ -212,7 +211,7 @@ pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State { tokenizer.point.index, )) { - tokenizer.exit(Name::MdxJsxTextTagName); + tokenizer.exit(Name::MdxJsxTagName); State::Retry(StateName::MdxJsxAttributeBefore) } else { crash( @@ -237,7 +236,7 @@ pub fn member_name_before(tokenizer: &mut Tokenizer) -> State { tokenizer.parse_state.bytes, tokenizer.point.index, )) { - tokenizer.enter(Name::MdxJsxTextTagNameMember); + tokenizer.enter(Name::MdxJsxTagNameMember); tokenizer.consume(); State::Next(StateName::MdxJsxMemberName) } else { @@ -261,7 +260,7 @@ pub fn member_name(tokenizer: &mut Tokenizer) -> State { if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'.' | b'/' | b'>' | b'{')) { - tokenizer.exit(Name::MdxJsxTextTagNameMember); + tokenizer.exit(Name::MdxJsxTagNameMember); tokenizer.attempt(State::Next(StateName::MdxJsxMemberNameAfter), State::Nok); State::Retry(StateName::MdxJsxEsWhitespaceStart) } @@ -303,9 +302,9 @@ pub fn member_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Start of another member name. Some(b'.') => { - tokenizer.enter(Name::MdxJsxTextTagNameMemberMarker); + tokenizer.enter(Name::MdxJsxTagNameMemberMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagNameMemberMarker); + tokenizer.exit(Name::MdxJsxTagNameMemberMarker); tokenizer.attempt(State::Next(StateName::MdxJsxMemberNameBefore), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } @@ -317,7 +316,7 @@ pub fn member_name_after(tokenizer: &mut Tokenizer) -> State { tokenizer.point.index, )) { - tokenizer.exit(Name::MdxJsxTextTagName); + tokenizer.exit(Name::MdxJsxTagName); State::Retry(StateName::MdxJsxAttributeBefore) } else { crash( @@ -342,7 +341,7 @@ pub fn local_name_before(tokenizer: &mut Tokenizer) -> State { tokenizer.parse_state.bytes, tokenizer.point.index, )) { - tokenizer.enter(Name::MdxJsxTextTagNameLocal); + tokenizer.enter(Name::MdxJsxTagNameLocal); tokenizer.consume(); State::Next(StateName::MdxJsxLocalName) } else { @@ -372,7 +371,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State { if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) { - tokenizer.exit(Name::MdxJsxTextTagNameLocal); + tokenizer.exit(Name::MdxJsxTagNameLocal); tokenizer.attempt(State::Next(StateName::MdxJsxLocalNameAfter), State::Nok); State::Retry(StateName::MdxJsxEsWhitespaceStart) } @@ -414,7 +413,7 @@ pub fn local_name_after(tokenizer: &mut Tokenizer) -> State { tokenizer.point.index, )) { - tokenizer.exit(Name::MdxJsxTextTagName); + tokenizer.exit(Name::MdxJsxTagName); State::Retry(StateName::MdxJsxAttributeBefore) } else { crash( @@ -441,9 +440,9 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Self-closing. Some(b'/') => { - tokenizer.enter(Name::MdxJsxTextTagSelfClosingMarker); + tokenizer.enter(Name::MdxJsxTagSelfClosingMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagSelfClosingMarker); + tokenizer.exit(Name::MdxJsxTagSelfClosingMarker); tokenizer.attempt(State::Next(StateName::MdxJsxSelfClosing), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } @@ -457,9 +456,9 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State { tokenizer.parse_state.bytes, tokenizer.point.index, )) { - tokenizer.enter(Name::MdxJsxTextTagAttribute); - tokenizer.enter(Name::MdxJsxTextTagAttributeName); - tokenizer.enter(Name::MdxJsxTextTagAttributePrimaryName); + tokenizer.enter(Name::MdxJsxTagAttribute); + tokenizer.enter(Name::MdxJsxTagAttributeName); + tokenizer.enter(Name::MdxJsxTagAttributePrimaryName); tokenizer.consume(); State::Next(StateName::MdxJsxAttributePrimaryName) } else { @@ -488,7 +487,7 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State { if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'/' | b':' | b'=' | b'>' | b'{')) { - tokenizer.exit(Name::MdxJsxTextTagAttributePrimaryName); + tokenizer.exit(Name::MdxJsxTagAttributePrimaryName); tokenizer.attempt( State::Next(StateName::MdxJsxAttributePrimaryNameAfter), State::Nok, @@ -528,9 +527,9 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Start of a local name. Some(b':') => { - tokenizer.enter(Name::MdxJsxTextTagAttributeNamePrefixMarker); + tokenizer.enter(Name::MdxJsxTagAttributeNamePrefixMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagAttributeNamePrefixMarker); + tokenizer.exit(Name::MdxJsxTagAttributeNamePrefixMarker); tokenizer.attempt( State::Next(StateName::MdxJsxAttributeLocalNameBefore), State::Nok, @@ -539,10 +538,10 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State { } // Initializer: start of an attribute value. Some(b'=') => { - tokenizer.exit(Name::MdxJsxTextTagAttributeName); - tokenizer.enter(Name::MdxJsxTextTagAttributeInitializerMarker); + tokenizer.exit(Name::MdxJsxTagAttributeName); + tokenizer.enter(Name::MdxJsxTagAttributeInitializerMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagAttributeInitializerMarker); + tokenizer.exit(Name::MdxJsxTagAttributeInitializerMarker); tokenizer.attempt( State::Next(StateName::MdxJsxAttributeValueBefore), State::Nok, @@ -559,8 +558,8 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State { tokenizer.point.index, )) { - tokenizer.exit(Name::MdxJsxTextTagAttributeName); - tokenizer.exit(Name::MdxJsxTextTagAttribute); + tokenizer.exit(Name::MdxJsxTagAttributeName); + tokenizer.exit(Name::MdxJsxTagAttribute); tokenizer.attempt(State::Next(StateName::MdxJsxAttributeBefore), State::Nok); State::Retry(StateName::MdxJsxEsWhitespaceStart) } else { @@ -586,7 +585,7 @@ pub fn attribute_local_name_before(tokenizer: &mut Tokenizer) -> State { tokenizer.parse_state.bytes, tokenizer.point.index, )) { - tokenizer.enter(Name::MdxJsxTextTagAttributeNameLocal); + tokenizer.enter(Name::MdxJsxTagAttributeNameLocal); tokenizer.consume(); State::Next(StateName::MdxJsxAttributeLocalName) } else { @@ -611,8 +610,8 @@ pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State { if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'/' | b'=' | b'>' | b'{')) { - tokenizer.exit(Name::MdxJsxTextTagAttributeNameLocal); - tokenizer.exit(Name::MdxJsxTextTagAttributeName); + tokenizer.exit(Name::MdxJsxTagAttributeNameLocal); + tokenizer.exit(Name::MdxJsxTagAttributeName); tokenizer.attempt( State::Next(StateName::MdxJsxAttributeLocalNameAfter), State::Nok, @@ -650,9 +649,9 @@ pub fn attribute_local_name_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { // Start of an attribute value. Some(b'=') => { - tokenizer.enter(Name::MdxJsxTextTagAttributeInitializerMarker); + tokenizer.enter(Name::MdxJsxTagAttributeInitializerMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagAttributeInitializerMarker); + tokenizer.exit(Name::MdxJsxTagAttributeInitializerMarker); tokenizer.attempt( State::Next(StateName::MdxJsxAttributeValueBefore), State::Nok, @@ -667,7 +666,7 @@ pub fn attribute_local_name_after(tokenizer: &mut Tokenizer) -> State { tokenizer.point.index, )) { - tokenizer.exit(Name::MdxJsxTextTagAttribute); + tokenizer.exit(Name::MdxJsxTagAttribute); State::Retry(StateName::MdxJsxAttributeBefore) } else { crash( @@ -693,10 +692,10 @@ pub fn attribute_value_before(tokenizer: &mut Tokenizer) -> State { // Start of double- or single quoted value. Some(b'"' | b'\'') => { tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); - tokenizer.enter(Name::MdxJsxTextTagAttributeValueLiteral); - tokenizer.enter(Name::MdxJsxTextTagAttributeValueLiteralMarker); + tokenizer.enter(Name::MdxJsxTagAttributeValueLiteral); + tokenizer.enter(Name::MdxJsxTagAttributeValueLiteralMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagAttributeValueLiteralMarker); + tokenizer.exit(Name::MdxJsxTagAttributeValueLiteralMarker); State::Next(StateName::MdxJsxAttributeValueQuotedStart) } // Attribute value expression. @@ -726,11 +725,11 @@ pub fn attribute_value_quoted_start(tokenizer: &mut Tokenizer) -> State { if let Some(byte) = tokenizer.current { if byte == tokenizer.tokenize_state.marker { tokenizer.tokenize_state.marker = 0; - tokenizer.enter(Name::MdxJsxTextTagAttributeValueLiteralMarker); + tokenizer.enter(Name::MdxJsxTagAttributeValueLiteralMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagAttributeValueLiteralMarker); - tokenizer.exit(Name::MdxJsxTextTagAttributeValueLiteral); - tokenizer.exit(Name::MdxJsxTextTagAttribute); + tokenizer.exit(Name::MdxJsxTagAttributeValueLiteralMarker); + tokenizer.exit(Name::MdxJsxTagAttributeValueLiteral); + tokenizer.exit(Name::MdxJsxTagAttribute); tokenizer.attempt(State::Next(StateName::MdxJsxAttributeBefore), State::Nok); State::Next(StateName::MdxJsxEsWhitespaceStart) } else if byte == b'\n' { @@ -740,7 +739,7 @@ pub fn attribute_value_quoted_start(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::MdxJsxEsWhitespaceStart) } else { - tokenizer.enter(Name::MdxJsxTextTagAttributeValueLiteralValue); + tokenizer.enter(Name::MdxJsxTagAttributeValueLiteralValue); State::Retry(StateName::MdxJsxAttributeValueQuoted) } } else { @@ -762,14 +761,10 @@ pub fn attribute_value_quoted_start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { - // To do: doesn’t this break for: - // ```markdown - // a if tokenizer.current == Some(tokenizer.tokenize_state.marker) || matches!(tokenizer.current, None | Some(b'\n')) { - tokenizer.exit(Name::MdxJsxTextTagAttributeValueLiteralValue); + tokenizer.exit(Name::MdxJsxTagAttributeValueLiteralValue); State::Retry(StateName::MdxJsxAttributeValueQuotedStart) } else { tokenizer.consume(); @@ -810,10 +805,10 @@ pub fn self_closing(tokenizer: &mut Tokenizer) -> State { pub fn tag_end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'>') => { - tokenizer.enter(Name::MdxJsxTextTagMarker); + tokenizer.enter(Name::MdxJsxTagMarker); tokenizer.consume(); - tokenizer.exit(Name::MdxJsxTextTagMarker); - tokenizer.exit(Name::MdxJsxTextTag); + tokenizer.exit(Name::MdxJsxTagMarker); + tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); State::Ok } _ => unreachable!("expected `>`"), @@ -828,17 +823,12 @@ pub fn tag_end(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn es_whitespace_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\n') => { - // To do: check if this works for blank lines? - // To do: `text` allows lazy lines here, flow doesn’t. - tokenizer.attempt(State::Next(StateName::MdxJsxEsWhitespaceStart), State::Nok); - State::Retry(space_or_tab_eol(tokenizer)) - } + Some(b'\n') => State::Retry(StateName::MdxJsxEsWhitespaceEol), _ => { if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace { - tokenizer.enter(Name::MdxJsxTextEsWhitespace); + tokenizer.enter(Name::MdxJsxEsWhitespace); State::Retry(StateName::MdxJsxEsWhitespaceInside) } else { State::Ok @@ -856,8 +846,8 @@ pub fn es_whitespace_start(tokenizer: &mut Tokenizer) -> State { pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'\n') => { - tokenizer.exit(Name::MdxJsxTextEsWhitespace); - State::Retry(StateName::MdxJsxEsWhitespaceStart) + tokenizer.exit(Name::MdxJsxEsWhitespace); + State::Retry(StateName::MdxJsxEsWhitespaceEol) } // Allow continuation bytes. Some(0x80..=0xBF) => { @@ -871,7 +861,55 @@ pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); State::Next(StateName::MdxJsxEsWhitespaceInside) } else { - tokenizer.exit(Name::MdxJsxTextEsWhitespace); + tokenizer.exit(Name::MdxJsxEsWhitespace); + State::Ok + } + } + } +} + +pub fn es_whitespace_eol(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'\n') => { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::MdxJsxEsWhitespaceEolAfter) + } + _ => State::Ok, + } +} + +pub fn es_whitespace_eol_after(tokenizer: &mut Tokenizer) -> State { + if tokenizer.tokenize_state.token_1 == Name::MdxJsxFlowTag && tokenizer.lazy { + crash_lazy(tokenizer) + } else if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) + == CharacterKind::Whitespace + { + tokenizer.enter(Name::MdxJsxEsWhitespace); + State::Retry(StateName::MdxJsxEsWhitespaceEolAfterInside) + } else { + State::Ok + } +} + +pub fn es_whitespace_eol_after_inside(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + // Not allowed. + Some(b'\n') => State::Nok, + // Allow continuation bytes. + Some(0x80..=0xBF) => { + tokenizer.consume(); + State::Next(StateName::MdxJsxEsWhitespaceEolAfterInside) + } + _ => { + if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) + == CharacterKind::Whitespace + { + tokenizer.consume(); + State::Next(StateName::MdxJsxEsWhitespaceEolAfterInside) + } else { + tokenizer.exit(Name::MdxJsxEsWhitespace); State::Ok } } @@ -894,8 +932,19 @@ fn id_cont(code: Option) -> bool { } } +fn crash_lazy(tokenizer: &Tokenizer) -> State { + State::Error(format!( + "{}:{}: Unexpected lazy line in container, expected line to be prefixed with `>` when in a block quote, whitespace when in a list, etc", + tokenizer.point.line, tokenizer.point.column + )) +} + fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> State { - let char = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index); + let char = if tokenizer.current == None { + None + } else { + char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) + }; // To do: externalize this, and the print mechanism in the tokenizer, // to one proper formatter. diff --git a/src/event.rs b/src/event.rs index b476d45..31059b0 100644 --- a/src/event.rs +++ b/src/event.rs @@ -2732,30 +2732,31 @@ pub enum Name { ThematicBreakSequence, // To do: sort. + MdxJsxFlowTag, MdxJsxTextTag, - MdxJsxTextTagMarker, // void - MdxJsxTextEsWhitespace, // void - MdxJsxTextTagClosingMarker, // void - MdxJsxTextTagName, - MdxJsxTextTagNamePrimary, // void? - MdxJsxTextTagNameMemberMarker, // void - MdxJsxTextTagNamePrefixMarker, // void - MdxJsxTextTagNameMember, // void - MdxJsxTextTagNameLocal, // void - MdxJsxTextTagSelfClosingMarker, // void - MdxJsxTextTagAttribute, - MdxJsxTextTagAttributeName, - MdxJsxTextTagAttributePrimaryName, - MdxJsxTextTagAttributeNamePrefixMarker, // void - MdxJsxTextTagAttributeInitializerMarker, // void - MdxJsxTextTagAttributeNameLocal, // void - MdxJsxTextTagAttributeValueLiteral, - MdxJsxTextTagAttributeValueLiteralMarker, // void - MdxJsxTextTagAttributeValueLiteralValue, + MdxJsxTagMarker, + MdxJsxTagClosingMarker, + MdxJsxTagName, + MdxJsxTagNamePrimary, + MdxJsxTagNameMemberMarker, + MdxJsxTagNamePrefixMarker, + MdxJsxTagNameMember, + MdxJsxTagNameLocal, + MdxJsxTagSelfClosingMarker, + MdxJsxTagAttribute, + MdxJsxTagAttributeName, + MdxJsxTagAttributePrimaryName, + MdxJsxTagAttributeNamePrefixMarker, + MdxJsxTagAttributeInitializerMarker, + MdxJsxTagAttributeNameLocal, + MdxJsxTagAttributeValueLiteral, + MdxJsxTagAttributeValueLiteralMarker, + MdxJsxTagAttributeValueLiteralValue, + MdxJsxEsWhitespace, } /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 60] = [ +pub const VOID_EVENTS: [Name; 73] = [ Name::AttentionSequence, Name::AutolinkEmail, Name::AutolinkMarker, @@ -2810,6 +2811,19 @@ pub const VOID_EVENTS: [Name; 60] = [ Name::MathFlowChunk, Name::MathTextData, Name::MathTextSequence, + Name::MdxJsxTagMarker, + Name::MdxJsxTagClosingMarker, + Name::MdxJsxTagNamePrimary, + Name::MdxJsxTagNameMemberMarker, + Name::MdxJsxTagNamePrefixMarker, + Name::MdxJsxTagNameMember, + Name::MdxJsxTagNameLocal, + Name::MdxJsxTagSelfClosingMarker, + Name::MdxJsxTagAttributeNamePrefixMarker, + Name::MdxJsxTagAttributeInitializerMarker, + Name::MdxJsxTagAttributeNameLocal, + Name::MdxJsxTagAttributeValueLiteralMarker, + Name::MdxJsxEsWhitespace, Name::ReferenceMarker, Name::ResourceMarker, Name::ResourceTitleMarker, diff --git a/src/lib.rs b/src/lib.rs index e0b6da2..2d5b044 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -301,6 +301,13 @@ pub struct Constructs { /// ^^^ /// ``` pub math_text: bool, + /// MDX: JSX (flow). + /// + /// ```markdown + /// > | + /// ^^^^^^^^^^^^^ + /// ``` + pub mdx_jsx_flow: bool, /// MDX: JSX (text). /// /// ```markdown @@ -349,6 +356,7 @@ impl Default for Constructs { list_item: true, math_flow: false, math_text: false, + mdx_jsx_flow: false, mdx_jsx_text: false, thematic_break: true, } @@ -388,6 +396,7 @@ impl Constructs { code_indented: false, html_flow: false, html_text: false, + mdx_jsx_flow: true, mdx_jsx_text: true, ..Self::default() } diff --git a/src/state.rs b/src/state.rs index 9e1e002..472a801 100644 --- a/src/state.rs +++ b/src/state.rs @@ -150,6 +150,7 @@ pub enum Name { FlowBeforeCodeIndented, FlowBeforeRaw, FlowBeforeHtml, + FlowBeforeMdxJsx, FlowBeforeHeadingAtx, FlowBeforeHeadingSetext, FlowBeforeThematicBreak, @@ -400,11 +401,19 @@ pub enum Name { TitleInside, // To do: sort. + MdxJsxFlowStart, + MdxJsxFlowBefore, + MdxJsxFlowAfter, + MdxJsxFlowEnd, + MdxJsxFlowNok, MdxJsxTextStart, MdxJsxTextAfter, MdxJsxTextNok, MdxJsxEsWhitespaceStart, MdxJsxEsWhitespaceInside, + MdxJsxEsWhitespaceEol, + MdxJsxEsWhitespaceEolAfter, + MdxJsxEsWhitespaceEolAfterInside, MdxJsxStart, MdxJsxStartAfter, MdxJsxNameBefore, @@ -435,11 +444,23 @@ pub enum Name { pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { let func = match name { // To do: sort. + Name::MdxJsxFlowStart => construct::mdx_jsx_flow::start, + Name::MdxJsxFlowBefore => construct::mdx_jsx_flow::before, + Name::MdxJsxFlowAfter => construct::mdx_jsx_flow::after, + Name::MdxJsxFlowEnd => construct::mdx_jsx_flow::end, + Name::MdxJsxFlowNok => construct::mdx_jsx_flow::nok, Name::MdxJsxTextStart => construct::mdx_jsx_text::start, Name::MdxJsxTextAfter => construct::mdx_jsx_text::after, Name::MdxJsxTextNok => construct::mdx_jsx_text::nok, + Name::MdxJsxEsWhitespaceStart => construct::partial_mdx_jsx::es_whitespace_start, Name::MdxJsxEsWhitespaceInside => construct::partial_mdx_jsx::es_whitespace_inside, + Name::MdxJsxEsWhitespaceEol => construct::partial_mdx_jsx::es_whitespace_eol, + Name::MdxJsxEsWhitespaceEolAfter => construct::partial_mdx_jsx::es_whitespace_eol_after, + Name::MdxJsxEsWhitespaceEolAfterInside => { + construct::partial_mdx_jsx::es_whitespace_eol_after_inside + } + Name::MdxJsxStart => construct::partial_mdx_jsx::start, Name::MdxJsxStartAfter => construct::partial_mdx_jsx::start_after, Name::MdxJsxNameBefore => construct::partial_mdx_jsx::name_before, @@ -463,7 +484,9 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { construct::partial_mdx_jsx::attribute_local_name_before } Name::MdxJsxAttributeLocalName => construct::partial_mdx_jsx::attribute_local_name, - Name::MdxJsxAttributeLocalNameAfter => construct::partial_mdx_jsx::attribute_local_name_after, + Name::MdxJsxAttributeLocalNameAfter => { + construct::partial_mdx_jsx::attribute_local_name_after + } Name::MdxJsxAttributeValueBefore => construct::partial_mdx_jsx::attribute_value_before, Name::MdxJsxAttributeValueQuotedStart => { construct::partial_mdx_jsx::attribute_value_quoted_start @@ -585,6 +608,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::FlowBeforeCodeIndented => construct::flow::before_code_indented, Name::FlowBeforeRaw => construct::flow::before_raw, Name::FlowBeforeHtml => construct::flow::before_html, + Name::FlowBeforeMdxJsx => construct::flow::before_mdx_jsx, Name::FlowBeforeHeadingAtx => construct::flow::before_heading_atx, Name::FlowBeforeHeadingSetext => construct::flow::before_heading_setext, Name::FlowBeforeThematicBreak => construct::flow::before_thematic_break, -- cgit