aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/mdx_jsx_text.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-07 15:53:06 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-07 15:53:06 +0200
commit1d92666865b35341e076efbefddf6e73b5e1542e (patch)
tree11c05985ec7679f73473e7ea2c769465698e2f08 /src/construct/mdx_jsx_text.rs
parente6018e52ee6ad9a8f8a0672b75bf515faf74af1f (diff)
downloadmarkdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.gz
markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.bz2
markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.zip
Add support for recoverable syntax errors
Diffstat (limited to 'src/construct/mdx_jsx_text.rs')
-rw-r--r--src/construct/mdx_jsx_text.rs88
1 files changed, 41 insertions, 47 deletions
diff --git a/src/construct/mdx_jsx_text.rs b/src/construct/mdx_jsx_text.rs
index deeb3e9..4c71fec 100644
--- a/src/construct/mdx_jsx_text.rs
+++ b/src/construct/mdx_jsx_text.rs
@@ -76,10 +76,10 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State {
// Fragment opening tag.
Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd),
_ => {
- // To do: unicode.
- let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index);
-
- if id_start(char_opt) {
+ if id_start(char_after_index(
+ tokenizer.parse_state.bytes,
+ tokenizer.point.index,
+ )) {
tokenizer.enter(Name::MdxJsxTextTagName);
tokenizer.enter(Name::MdxJsxTextTagNamePrimary);
tokenizer.consume();
@@ -111,34 +111,32 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- // Fragment closing tag.
- Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd),
- // Start of a closing tag name.
- _ => {
- // To do: unicode.
- let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index);
-
- if id_start(char_opt) {
- tokenizer.enter(Name::MdxJsxTextTagName);
- tokenizer.enter(Name::MdxJsxTextTagNamePrimary);
- tokenizer.consume();
- State::Next(StateName::MdxJsxTextPrimaryName)
- } else {
- crash(
- tokenizer,
- "before name",
- &format!(
- "a character that can start a name, such as a letter, `$`, or `_`{}",
- if tokenizer.current == Some(b'*' | b'/') {
- " (note: JS comments in JSX tags are not supported in MDX)"
- } else {
- ""
- }
- ),
- )
- }
- }
+ // Fragment closing tag.
+ if let Some(b'>') = tokenizer.current {
+ State::Retry(StateName::MdxJsxTextTagEnd)
+ }
+ // Start of a closing tag name.
+ else if id_start(char_after_index(
+ tokenizer.parse_state.bytes,
+ tokenizer.point.index,
+ )) {
+ tokenizer.enter(Name::MdxJsxTextTagName);
+ tokenizer.enter(Name::MdxJsxTextTagNamePrimary);
+ tokenizer.consume();
+ State::Next(StateName::MdxJsxTextPrimaryName)
+ } else {
+ crash(
+ tokenizer,
+ "before name",
+ &format!(
+ "a character that can start a name, such as a letter, `$`, or `_`{}",
+ if tokenizer.current == Some(b'*' | b'/') {
+ " (note: JS comments in JSX tags are not supported in MDX)"
+ } else {
+ ""
+ }
+ ),
+ )
}
}
@@ -162,7 +160,6 @@ pub fn primary_name(tokenizer: &mut Tokenizer) -> State {
}
// Continuation of name: remain.
// Allow continuation bytes.
- // To do: unicode.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont(char_after_index(
tokenizer.parse_state.bytes,
@@ -284,7 +281,7 @@ pub fn member_name(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::MdxJsxTextEsWhitespaceStart)
}
// Continuation of name: remain.
- // To do: unicode.
+ // Allow continuation bytes.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont(char_after_index(
tokenizer.parse_state.bytes,
@@ -398,7 +395,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::MdxJsxTextEsWhitespaceStart)
}
// Continuation of name: remain.
- // To do: unicode.
+ // Allow continuation bytes.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont(char_after_index(
tokenizer.parse_state.bytes,
@@ -516,8 +513,8 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::MdxJsxTextEsWhitespaceStart)
}
- // Continuation of the attribute name: remain.
- // To do: unicode.
+ // Continuation of name: remain.
+ // Allow continuation bytes.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont(char_after_index(
tokenizer.parse_state.bytes,
@@ -525,7 +522,7 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {
))
{
tokenizer.consume();
- State::Next(StateName::MdxJsxTextLocalName)
+ State::Next(StateName::MdxJsxTextAttributePrimaryName)
} else {
crash(
tokenizer,
@@ -643,8 +640,8 @@ pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::MdxJsxTextEsWhitespaceStart)
}
- // Continuation of local name: remain.
- // To do: unicode.
+ // Continuation of name: remain.
+ // Allow continuation bytes.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
|| id_cont(char_after_index(
tokenizer.parse_state.bytes,
@@ -906,7 +903,6 @@ pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State {
}
}
-// To do: unicode.
fn id_start(code: Option<char>) -> bool {
if let Some(char) = code {
UnicodeID::is_id_start(char) || matches!(char, '$' | '_')
@@ -915,7 +911,6 @@ fn id_start(code: Option<char>) -> bool {
}
}
-// To do: unicode.
fn id_cont(code: Option<char>) -> bool {
if let Some(char) = code {
UnicodeID::is_id_continue(char) || matches!(char, '-' | '\u{200c}' | '\u{200d}')
@@ -924,25 +919,24 @@ fn id_cont(code: Option<char>) -> bool {
}
}
-fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> ! {
+fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> State {
// To do: externalize this, and the print mechanism in the tokenizer,
// to one proper formatter.
- // To do: figure out how Rust does errors?
let actual = match tokenizer.current {
None => "end of file".to_string(),
Some(byte) => format_byte(byte),
};
- unreachable!(
+ State::Error(format!(
"{}:{}: Unexpected {} {}, expected {}",
tokenizer.point.line, tokenizer.point.column, actual, at, expect
- )
+ ))
}
fn format_byte(byte: u8) -> String {
match byte {
b'`' => "`` ` ``".to_string(),
b' '..=b'~' => format!("`{}`", str::from_utf8(&[byte]).unwrap()),
- _ => format!("U+{:>04X}", byte),
+ _ => format!("character U+{:>04X}", byte),
}
}