diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-09-07 15:53:06 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-09-07 15:53:06 +0200 |
commit | 1d92666865b35341e076efbefddf6e73b5e1542e (patch) | |
tree | 11c05985ec7679f73473e7ea2c769465698e2f08 /src/construct/mdx_jsx_text.rs | |
parent | e6018e52ee6ad9a8f8a0672b75bf515faf74af1f (diff) | |
download | markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.gz markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.bz2 markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.zip |
Add support for recoverable syntax errors
Diffstat (limited to '')
-rw-r--r-- | src/construct/mdx_jsx_text.rs | 88 |
1 files changed, 41 insertions, 47 deletions
diff --git a/src/construct/mdx_jsx_text.rs b/src/construct/mdx_jsx_text.rs index deeb3e9..4c71fec 100644 --- a/src/construct/mdx_jsx_text.rs +++ b/src/construct/mdx_jsx_text.rs @@ -76,10 +76,10 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State { // Fragment opening tag. Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd), _ => { - // To do: unicode. - let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index); - - if id_start(char_opt) { + if id_start(char_after_index( + tokenizer.parse_state.bytes, + tokenizer.point.index, + )) { tokenizer.enter(Name::MdxJsxTextTagName); tokenizer.enter(Name::MdxJsxTextTagNamePrimary); tokenizer.consume(); @@ -111,34 +111,32 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - // Fragment closing tag. - Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd), - // Start of a closing tag name. - _ => { - // To do: unicode. - let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index); - - if id_start(char_opt) { - tokenizer.enter(Name::MdxJsxTextTagName); - tokenizer.enter(Name::MdxJsxTextTagNamePrimary); - tokenizer.consume(); - State::Next(StateName::MdxJsxTextPrimaryName) - } else { - crash( - tokenizer, - "before name", - &format!( - "a character that can start a name, such as a letter, `$`, or `_`{}", - if tokenizer.current == Some(b'*' | b'/') { - " (note: JS comments in JSX tags are not supported in MDX)" - } else { - "" - } - ), - ) - } - } + // Fragment closing tag. + if let Some(b'>') = tokenizer.current { + State::Retry(StateName::MdxJsxTextTagEnd) + } + // Start of a closing tag name. + else if id_start(char_after_index( + tokenizer.parse_state.bytes, + tokenizer.point.index, + )) { + tokenizer.enter(Name::MdxJsxTextTagName); + tokenizer.enter(Name::MdxJsxTextTagNamePrimary); + tokenizer.consume(); + State::Next(StateName::MdxJsxTextPrimaryName) + } else { + crash( + tokenizer, + "before name", + &format!( + "a character that can start a name, such as a letter, `$`, or `_`{}", + if tokenizer.current == Some(b'*' | b'/') { + " (note: JS comments in JSX tags are not supported in MDX)" + } else { + "" + } + ), + ) } } @@ -162,7 +160,6 @@ pub fn primary_name(tokenizer: &mut Tokenizer) -> State { } // Continuation of name: remain. // Allow continuation bytes. - // To do: unicode. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -284,7 +281,7 @@ pub fn member_name(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::MdxJsxTextEsWhitespaceStart) } // Continuation of name: remain. - // To do: unicode. + // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -398,7 +395,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::MdxJsxTextEsWhitespaceStart) } // Continuation of name: remain. - // To do: unicode. + // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -516,8 +513,8 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::MdxJsxTextEsWhitespaceStart) } - // Continuation of the attribute name: remain. - // To do: unicode. + // Continuation of name: remain. + // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -525,7 +522,7 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State { )) { tokenizer.consume(); - State::Next(StateName::MdxJsxTextLocalName) + State::Next(StateName::MdxJsxTextAttributePrimaryName) } else { crash( tokenizer, @@ -643,8 +640,8 @@ pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::MdxJsxTextEsWhitespaceStart) } - // Continuation of local name: remain. - // To do: unicode. + // Continuation of name: remain. + // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -906,7 +903,6 @@ pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State { } } -// To do: unicode. fn id_start(code: Option<char>) -> bool { if let Some(char) = code { UnicodeID::is_id_start(char) || matches!(char, '$' | '_') @@ -915,7 +911,6 @@ fn id_start(code: Option<char>) -> bool { } } -// To do: unicode. fn id_cont(code: Option<char>) -> bool { if let Some(char) = code { UnicodeID::is_id_continue(char) || matches!(char, '-' | '\u{200c}' | '\u{200d}') @@ -924,25 +919,24 @@ fn id_cont(code: Option<char>) -> bool { } } -fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> ! { +fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> State { // To do: externalize this, and the print mechanism in the tokenizer, // to one proper formatter. - // To do: figure out how Rust does errors? let actual = match tokenizer.current { None => "end of file".to_string(), Some(byte) => format_byte(byte), }; - unreachable!( + State::Error(format!( "{}:{}: Unexpected {} {}, expected {}", tokenizer.point.line, tokenizer.point.column, actual, at, expect - ) + )) } fn format_byte(byte: u8) -> String { match byte { b'`' => "`` ` ``".to_string(), b' '..=b'~' => format!("`{}`", str::from_utf8(&[byte]).unwrap()), - _ => format!("U+{:>04X}", byte), + _ => format!("character U+{:>04X}", byte), } } |