diff options
Diffstat (limited to 'src/construct')
| -rw-r--r-- | src/construct/document.rs | 21 | ||||
| -rw-r--r-- | src/construct/mdx_jsx_text.rs | 88 | 
2 files changed, 56 insertions, 53 deletions
diff --git a/src/construct/document.rs b/src/construct/document.rs index e31e58d..57c5f3a 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -14,7 +14,7 @@ use crate::state::{Name as StateName, State};  use crate::subtokenize::divide_events;  use crate::tokenizer::{Container, ContainerState, Tokenizer};  use crate::util::skip; -use alloc::{boxed::Box, vec::Vec}; +use alloc::{boxed::Box, string::String, vec::Vec};  /// Phases where we can exit containers.  #[derive(Debug, PartialEq)] @@ -266,7 +266,9 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {      if tokenizer.tokenize_state.document_continued          != tokenizer.tokenize_state.document_container_stack.len()      { -        exit_containers(tokenizer, &Phase::Prefix); +        if let Err(message) = exit_containers(tokenizer, &Phase::Prefix) { +            return State::Error(message); +        }      }      // We are “piercing” into the flow with a new container. @@ -361,6 +363,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {      let state = tokenizer          .tokenize_state          .document_child_state +        .take()          .unwrap_or(State::Next(StateName::FlowStart));      tokenizer.tokenize_state.document_exits.push(None); @@ -439,13 +442,17 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {      if tokenizer.tokenize_state.document_continued          != tokenizer.tokenize_state.document_container_stack.len()      { -        exit_containers(tokenizer, &Phase::After); +        if let Err(message) = exit_containers(tokenizer, &Phase::After) { +            return State::Error(message); +        }      }      match tokenizer.current {          None => {              tokenizer.tokenize_state.document_continued = 0; -            exit_containers(tokenizer, &Phase::Eof); +            if let Err(message) = exit_containers(tokenizer, &Phase::Eof) { +                return State::Error(message); +            }              resolve(tokenizer);              State::Ok          } @@ -461,7 +468,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {  }  /// Close containers (and flow if needed). -fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { +fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) -> Result<(), String> {      let mut stack_close = tokenizer          .tokenize_state          .document_container_stack @@ -477,7 +484,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {              .take()              .unwrap_or(State::Next(StateName::FlowStart)); -        child.flush(state, false); +        child.flush(state, false)?;      }      if !stack_close.is_empty() { @@ -524,6 +531,8 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {      }      child.interrupt = false; + +    Ok(())  }  // Inject everything together. diff --git a/src/construct/mdx_jsx_text.rs b/src/construct/mdx_jsx_text.rs index deeb3e9..4c71fec 100644 --- a/src/construct/mdx_jsx_text.rs +++ b/src/construct/mdx_jsx_text.rs @@ -76,10 +76,10 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State {          // Fragment opening tag.          Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd),          _ => { -            // To do: unicode. -            let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index); - -            if id_start(char_opt) { +            if id_start(char_after_index( +                tokenizer.parse_state.bytes, +                tokenizer.point.index, +            )) {                  tokenizer.enter(Name::MdxJsxTextTagName);                  tokenizer.enter(Name::MdxJsxTextTagNamePrimary);                  tokenizer.consume(); @@ -111,34 +111,32 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State {  ///         ^  /// ```  pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State { -    match tokenizer.current { -        // Fragment closing tag. -        Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd), -        // Start of a closing tag name. -        _ => { -            // To do: unicode. -            let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index); - -            if id_start(char_opt) { -                tokenizer.enter(Name::MdxJsxTextTagName); -                tokenizer.enter(Name::MdxJsxTextTagNamePrimary); -                tokenizer.consume(); -                State::Next(StateName::MdxJsxTextPrimaryName) -            } else { -                crash( -                    tokenizer, -                    "before name", -                    &format!( -                        "a character that can start a name, such as a letter, `$`, or `_`{}", -                        if tokenizer.current == Some(b'*' | b'/') { -                            " (note: JS comments in JSX tags are not supported in MDX)" -                        } else { -                            "" -                        } -                    ), -                ) -            } -        } +    // Fragment closing tag. +    if let Some(b'>') = tokenizer.current { +        State::Retry(StateName::MdxJsxTextTagEnd) +    } +    // Start of a closing tag name. +    else if id_start(char_after_index( +        tokenizer.parse_state.bytes, +        tokenizer.point.index, +    )) { +        tokenizer.enter(Name::MdxJsxTextTagName); +        tokenizer.enter(Name::MdxJsxTextTagNamePrimary); +        tokenizer.consume(); +        State::Next(StateName::MdxJsxTextPrimaryName) +    } else { +        crash( +            tokenizer, +            "before name", +            &format!( +                "a character that can start a name, such as a letter, `$`, or `_`{}", +                if tokenizer.current == Some(b'*' | b'/') { +                    " (note: JS comments in JSX tags are not supported in MDX)" +                } else { +                    "" +                } +            ), +        )      }  } @@ -162,7 +160,6 @@ pub fn primary_name(tokenizer: &mut Tokenizer) -> State {      }      // Continuation of name: remain.      // Allow continuation bytes. -    // To do: unicode.      else if matches!(tokenizer.current, Some(0x80..=0xBF))          || id_cont(char_after_index(              tokenizer.parse_state.bytes, @@ -284,7 +281,7 @@ pub fn member_name(tokenizer: &mut Tokenizer) -> State {          State::Retry(StateName::MdxJsxTextEsWhitespaceStart)      }      // Continuation of name: remain. -    // To do: unicode. +    // Allow continuation bytes.      else if matches!(tokenizer.current, Some(0x80..=0xBF))          || id_cont(char_after_index(              tokenizer.parse_state.bytes, @@ -398,7 +395,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State {          State::Retry(StateName::MdxJsxTextEsWhitespaceStart)      }      // Continuation of name: remain. -    // To do: unicode. +    // Allow continuation bytes.      else if matches!(tokenizer.current, Some(0x80..=0xBF))          || id_cont(char_after_index(              tokenizer.parse_state.bytes, @@ -516,8 +513,8 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {          );          State::Retry(StateName::MdxJsxTextEsWhitespaceStart)      } -    // Continuation of the attribute name: remain. -    // To do: unicode. +    // Continuation of name: remain. +    // Allow continuation bytes.      else if matches!(tokenizer.current, Some(0x80..=0xBF))          || id_cont(char_after_index(              tokenizer.parse_state.bytes, @@ -525,7 +522,7 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {          ))      {          tokenizer.consume(); -        State::Next(StateName::MdxJsxTextLocalName) +        State::Next(StateName::MdxJsxTextAttributePrimaryName)      } else {          crash(              tokenizer, @@ -643,8 +640,8 @@ pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State {          );          State::Retry(StateName::MdxJsxTextEsWhitespaceStart)      } -    // Continuation of local name: remain. -    // To do: unicode. +    // Continuation of name: remain. +    // Allow continuation bytes.      else if matches!(tokenizer.current, Some(0x80..=0xBF))          || id_cont(char_after_index(              tokenizer.parse_state.bytes, @@ -906,7 +903,6 @@ pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State {      }  } -// To do: unicode.  fn id_start(code: Option<char>) -> bool {      if let Some(char) = code {          UnicodeID::is_id_start(char) || matches!(char, '$' | '_') @@ -915,7 +911,6 @@ fn id_start(code: Option<char>) -> bool {      }  } -// To do: unicode.  fn id_cont(code: Option<char>) -> bool {      if let Some(char) = code {          UnicodeID::is_id_continue(char) || matches!(char, '-' | '\u{200c}' | '\u{200d}') @@ -924,25 +919,24 @@ fn id_cont(code: Option<char>) -> bool {      }  } -fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> ! { +fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> State {      // To do: externalize this, and the print mechanism in the tokenizer,      // to one proper formatter. -    // To do: figure out how Rust does errors?      let actual = match tokenizer.current {          None => "end of file".to_string(),          Some(byte) => format_byte(byte),      }; -    unreachable!( +    State::Error(format!(          "{}:{}: Unexpected {} {}, expected {}",          tokenizer.point.line, tokenizer.point.column, actual, at, expect -    ) +    ))  }  fn format_byte(byte: u8) -> String {      match byte {          b'`' => "`` ` ``".to_string(),          b' '..=b'~' => format!("`{}`", str::from_utf8(&[byte]).unwrap()), -        _ => format!("U+{:>04X}", byte), +        _ => format!("character U+{:>04X}", byte),      }  }  | 
