Add support for recoverable syntax errors

author: Titus Wormer <tituswormer@gmail.com> 2022-09-07 15:53:06 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-09-07 15:53:06 +0200
commit: 1d92666865b35341e076efbefddf6e73b5e1542e (patch)
tree: 11c05985ec7679f73473e7ea2c769465698e2f08 /src
parent: e6018e52ee6ad9a8f8a0672b75bf515faf74af1f (diff)
download: markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.gz
markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.bz2
markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.zip
8 files changed, 175 insertions, 97 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index b271768..4f0f958 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -333,7 +333,7 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String {
         generate_footnote_section(&mut context);
     }
 
-    assert_eq!(context.buffers.len(), 1, "expected 1 final buffer");
+    debug_assert_eq!(context.buffers.len(), 1, "expected 1 final buffer");
     context
         .buffers
         .get(0)
diff --git a/src/construct/document.rs b/src/construct/document.rs
index e31e58d..57c5f3a 100644
--- a/src/construct/document.rs
+++ b/src/construct/document.rs
@@ -14,7 +14,7 @@ use crate::state::{Name as StateName, State};
 use crate::subtokenize::divide_events;
 use crate::tokenizer::{Container, ContainerState, Tokenizer};
 use crate::util::skip;
-use alloc::{boxed::Box, vec::Vec};
+use alloc::{boxed::Box, string::String, vec::Vec};
 
 /// Phases where we can exit containers.
 #[derive(Debug, PartialEq)]
@@ -266,7 +266,9 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
     if tokenizer.tokenize_state.document_continued
         != tokenizer.tokenize_state.document_container_stack.len()
     {
-        exit_containers(tokenizer, &Phase::Prefix);
+        if let Err(message) = exit_containers(tokenizer, &Phase::Prefix) {
+            return State::Error(message);
+        }
     }
 
     // We are “piercing” into the flow with a new container.
@@ -361,6 +363,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
     let state = tokenizer
         .tokenize_state
         .document_child_state
+        .take()
         .unwrap_or(State::Next(StateName::FlowStart));
 
     tokenizer.tokenize_state.document_exits.push(None);
@@ -439,13 +442,17 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
     if tokenizer.tokenize_state.document_continued
         != tokenizer.tokenize_state.document_container_stack.len()
     {
-        exit_containers(tokenizer, &Phase::After);
+        if let Err(message) = exit_containers(tokenizer, &Phase::After) {
+            return State::Error(message);
+        }
     }
 
     match tokenizer.current {
         None => {
             tokenizer.tokenize_state.document_continued = 0;
-            exit_containers(tokenizer, &Phase::Eof);
+            if let Err(message) = exit_containers(tokenizer, &Phase::Eof) {
+                return State::Error(message);
+            }
             resolve(tokenizer);
             State::Ok
         }
@@ -461,7 +468,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
 }
 
 /// Close containers (and flow if needed).
-fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
+fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) -> Result<(), String> {
     let mut stack_close = tokenizer
         .tokenize_state
         .document_container_stack
@@ -477,7 +484,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
             .take()
             .unwrap_or(State::Next(StateName::FlowStart));
 
-        child.flush(state, false);
+        child.flush(state, false)?;
     }
 
     if !stack_close.is_empty() {
@@ -524,6 +531,8 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
     }
 
     child.interrupt = false;
+
+    Ok(())
 }
 
 // Inject everything together.
diff --git a/src/construct/mdx_jsx_text.rs b/src/construct/mdx_jsx_text.rs
index deeb3e9..4c71fec 100644
--- a/src/construct/mdx_jsx_text.rs
+++ b/src/construct/mdx_jsx_text.rs
@@ -76,10 +76,10 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State {
         // Fragment opening tag.
         Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd),
         _ => {
-            // To do: unicode.
-            let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index);
-
-            if id_start(char_opt) {
+            if id_start(char_after_index(
+                tokenizer.parse_state.bytes,
+                tokenizer.point.index,
+            )) {
                 tokenizer.enter(Name::MdxJsxTextTagName);
                 tokenizer.enter(Name::MdxJsxTextTagNamePrimary);
                 tokenizer.consume();
@@ -111,34 +111,32 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State {
 ///         ^
 /// ```
 pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State {
-    match tokenizer.current {
-        // Fragment closing tag.
-        Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd),
-        // Start of a closing tag name.
-        _ => {
-            // To do: unicode.
-            let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index);
-
-            if id_start(char_opt) {
-                tokenizer.enter(Name::MdxJsxTextTagName);
-                tokenizer.enter(Name::MdxJsxTextTagNamePrimary);
-                tokenizer.consume();
-                State::Next(StateName::MdxJsxTextPrimaryName)
-            } else {
-                crash(
-                    tokenizer,
-                    "before name",
-                    &format!(
-                        "a character that can start a name, such as a letter, `$`, or `_`{}",
-                        if tokenizer.current == Some(b'*' | b'/') {
-                            " (note: JS comments in JSX tags are not supported in MDX)"
-                        } else {
-                            ""
-                        }
-                    ),
-                )
-            }
-        }
+    // Fragment closing tag.
+    if let Some(b'>') = tokenizer.current {
+        State::Retry(StateName::MdxJsxTextTagEnd)
+    }
+    // Start of a closing tag name.
+    else if id_start(char_after_index(
+        tokenizer.parse_state.bytes,
+        tokenizer.point.index,
+    )) {
+        tokenizer.enter(Name::MdxJsxTextTagName);
+        tokenizer.enter(Name::MdxJsxTextTagNamePrimary);
+        tokenizer.consume();
+        State::Next(StateName::MdxJsxTextPrimaryName)
+    } else {
+        crash(
+            tokenizer,
+            "before name",
+            &format!(
+                "a character that can start a name, such as a letter, `$`, or `_`{}",
+                if tokenizer.current == Some(b'*' | b'/') {
+                    " (note: JS comments in JSX tags are not supported in MDX)"
+                } else {
+                    ""
+                }
+            ),
+        )
     }
 }
 
@@ -162,7 +160,6 @@ pub fn primary_name(tokenizer: &mut Tokenizer) -> State {
     }
     // Continuation of name: remain.
     // Allow continuation bytes.
-    // To do: unicode.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
         || id_cont(char_after_index(
             tokenizer.parse_state.bytes,
@@ -284,7 +281,7 @@ pub fn member_name(tokenizer: &mut Tokenizer) -> State {
         State::Retry(StateName::MdxJsxTextEsWhitespaceStart)
     }
     // Continuation of name: remain.
-    // To do: unicode.
+    // Allow continuation bytes.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
         || id_cont(char_after_index(
             tokenizer.parse_state.bytes,
@@ -398,7 +395,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State {
         State::Retry(StateName::MdxJsxTextEsWhitespaceStart)
     }
     // Continuation of name: remain.
-    // To do: unicode.
+    // Allow continuation bytes.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
         || id_cont(char_after_index(
             tokenizer.parse_state.bytes,
@@ -516,8 +513,8 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {
         );
         State::Retry(StateName::MdxJsxTextEsWhitespaceStart)
     }
-    // Continuation of the attribute name: remain.
-    // To do: unicode.
+    // Continuation of name: remain.
+    // Allow continuation bytes.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
         || id_cont(char_after_index(
             tokenizer.parse_state.bytes,
@@ -525,7 +522,7 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {
         ))
     {
         tokenizer.consume();
-        State::Next(StateName::MdxJsxTextLocalName)
+        State::Next(StateName::MdxJsxTextAttributePrimaryName)
     } else {
         crash(
             tokenizer,
@@ -643,8 +640,8 @@ pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State {
         );
         State::Retry(StateName::MdxJsxTextEsWhitespaceStart)
     }
-    // Continuation of local name: remain.
-    // To do: unicode.
+    // Continuation of name: remain.
+    // Allow continuation bytes.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
         || id_cont(char_after_index(
             tokenizer.parse_state.bytes,
@@ -906,7 +903,6 @@ pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State {
     }
 }
 
-// To do: unicode.
 fn id_start(code: Option<char>) -> bool {
     if let Some(char) = code {
         UnicodeID::is_id_start(char) || matches!(char, '$' | '_')
@@ -915,7 +911,6 @@ fn id_start(code: Option<char>) -> bool {
     }
 }
 
-// To do: unicode.
 fn id_cont(code: Option<char>) -> bool {
     if let Some(char) = code {
         UnicodeID::is_id_continue(char) || matches!(char, '-' | '\u{200c}' | '\u{200d}')
@@ -924,25 +919,24 @@ fn id_cont(code: Option<char>) -> bool {
     }
 }
 
-fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> ! {
+fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> State {
     // To do: externalize this, and the print mechanism in the tokenizer,
     // to one proper formatter.
-    // To do: figure out how Rust does errors?
     let actual = match tokenizer.current {
         None => "end of file".to_string(),
         Some(byte) => format_byte(byte),
     };
 
-    unreachable!(
+    State::Error(format!(
         "{}:{}: Unexpected {} {}, expected {}",
         tokenizer.point.line, tokenizer.point.column, actual, at, expect
-    )
+    ))
 }
 
 fn format_byte(byte: u8) -> String {
     match byte {
         b'`' => "`` ` ``".to_string(),
         b' '..=b'~' => format!("`{}`", str::from_utf8(&[byte]).unwrap()),
-        _ => format!("U+{:>04X}", byte),
+        _ => format!("character U+{:>04X}", byte),
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 7fd705b..e0b6da2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -406,6 +406,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // micromark is safe by default:
     /// assert_eq!(
@@ -421,9 +422,11 @@ pub struct Options {
     ///             allow_dangerous_html: true,
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p>Hi, <i>venus</i>!</p>"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub allow_dangerous_html: bool,
 
@@ -435,6 +438,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // micromark is safe by default:
     /// assert_eq!(
@@ -450,9 +454,11 @@ pub struct Options {
     ///             allow_dangerous_protocol: true,
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><a href=\"javascript:alert(1)\">javascript:alert(1)</a></p>"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub allow_dangerous_protocol: bool,
 
@@ -463,6 +469,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // micromark follows CommonMark by default:
     /// assert_eq!(
@@ -481,9 +488,11 @@ pub struct Options {
     ///             },
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p>indented code?</p>"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub constructs: Constructs,
 
@@ -503,6 +512,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options, LineEnding};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // micromark uses `\n` by default:
     /// assert_eq!(
@@ -518,9 +528,11 @@ pub struct Options {
     ///             default_line_ending: LineEnding::CarriageReturnLineFeed,
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<blockquote>\r\n<p>a</p>\r\n</blockquote>"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub default_line_ending: LineEnding,
 
@@ -534,6 +546,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // `"Footnotes"` is used by default:
     /// assert_eq!(
@@ -543,7 +556,7 @@ pub struct Options {
     ///             constructs: Constructs::gfm(),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
     ///
@@ -556,9 +569,11 @@ pub struct Options {
     ///             gfm_footnote_label: Some("Notes de bas de page".to_string()),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Notes de bas de page</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub gfm_footnote_label: Option<String>,
 
@@ -570,6 +585,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // `"h2"` is used by default:
     /// assert_eq!(
@@ -579,7 +595,7 @@ pub struct Options {
     ///             constructs: Constructs::gfm(),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
     ///
@@ -592,9 +608,11 @@ pub struct Options {
     ///             gfm_footnote_label_tag_name: Some("h1".to_string()),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h1 id=\"footnote-label\" class=\"sr-only\">Footnotes</h1>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub gfm_footnote_label_tag_name: Option<String>,
 
@@ -612,6 +630,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // `"class=\"sr-only\""` is used by default:
     /// assert_eq!(
@@ -621,7 +640,7 @@ pub struct Options {
     ///             constructs: Constructs::gfm(),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
     ///
@@ -634,9 +653,11 @@ pub struct Options {
     ///             gfm_footnote_label_attributes: Some("class=\"footnote-heading\"".to_string()),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"footnote-heading\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub gfm_footnote_label_attributes: Option<String>,
 
@@ -649,6 +670,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // `"Back to content"` is used by default:
     /// assert_eq!(
@@ -658,7 +680,7 @@ pub struct Options {
     ///             constructs: Constructs::gfm(),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
     ///
@@ -671,9 +693,11 @@ pub struct Options {
     ///             gfm_footnote_back_label: Some("Arrière".to_string()),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Arrière\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub gfm_footnote_back_label: Option<String>,
 
@@ -696,6 +720,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // `"user-content-"` is used by default:
     /// assert_eq!(
@@ -705,7 +730,7 @@ pub struct Options {
     ///             constructs: Constructs::gfm(),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
     ///
@@ -718,9 +743,11 @@ pub struct Options {
     ///             gfm_footnote_clobber_prefix: Some("".to_string()),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><sup><a href=\"#fn-a\" id=\"fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"fn-a\">\n<p>b <a href=\"#fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub gfm_footnote_clobber_prefix: Option<String>,
 
@@ -733,6 +760,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // micromark supports single tildes by default:
     /// assert_eq!(
@@ -742,7 +770,7 @@ pub struct Options {
     ///             constructs: Constructs::gfm(),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><del>a</del></p>"
     /// );
     ///
@@ -755,9 +783,11 @@ pub struct Options {
     ///             gfm_strikethrough_single_tilde: false,
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p>~a~</p>"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub gfm_strikethrough_single_tilde: bool,
 
@@ -772,6 +802,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark_with_options, Options, Constructs};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // With `allow_dangerous_html`, micromark passes HTML through untouched:
     /// assert_eq!(
@@ -782,7 +813,7 @@ pub struct Options {
     ///             constructs: Constructs::gfm(),
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<iframe>"
     /// );
     ///
@@ -796,9 +827,11 @@ pub struct Options {
     ///             gfm_tagfilter: true,
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "&lt;iframe>"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     ///
     /// ## References
@@ -817,6 +850,7 @@ pub struct Options {
     ///
     /// ```
     /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+    /// # fn main() -> Result<(), String> {
     ///
     /// // micromark supports single dollars by default:
     /// assert_eq!(
@@ -829,7 +863,7 @@ pub struct Options {
     ///             },
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p><code class=\"language-math math-inline\">a</code></p>"
     /// );
     ///
@@ -845,9 +879,11 @@ pub struct Options {
     ///             math_text_single_dollar: false,
     ///             ..Options::default()
     ///         }
-    ///     ),
+    ///     )?,
     ///     "<p>$a$</p>"
     /// );
+    /// # Ok(())
+    /// # }
     /// ```
     pub math_text_single_dollar: bool,
 }
@@ -879,32 +915,41 @@ impl Default for Options {
 /// ```
 /// use micromark::micromark;
 ///
-/// let result = micromark("# Hello, world!");
-///
-/// assert_eq!(result, "<h1>Hello, world!</h1>");
+/// assert_eq!(micromark("# Hello, world!"), "<h1>Hello, world!</h1>");
 /// ```
 #[must_use]
+#[allow(clippy::missing_panics_doc)]
 pub fn micromark(value: &str) -> String {
-    micromark_with_options(value, &Options::default())
+    micromark_with_options(value, &Options::default()).unwrap()
 }
 
 /// Turn markdown into HTML, with configuration.
 ///
+/// ## Errors
+///
+/// `micromark_with_options` never errors with normal markdown because markdown
+/// does not have syntax errors, so feel free to `unwrap()`.
+/// However, MDX does have syntax errors.
+/// When MDX is turned on, there are several errors that can occur with how
+/// JSX, expressions, or ESM are written.
+///
 /// ## Examples
 ///
 /// ```
 /// use micromark::{micromark_with_options, Options};
+/// # fn main() -> Result<(), String> {
 ///
 /// let result = micromark_with_options("<div>\n\n# Hello, world!\n\n</div>", &Options {
 ///     allow_dangerous_html: true,
 ///     allow_dangerous_protocol: true,
 ///     ..Options::default()
-/// });
+/// })?;
 ///
 /// assert_eq!(result, "<div>\n<h1>Hello, world!</h1>\n</div>");
+/// # Ok(())
+/// # }
 /// ```
-#[must_use]
-pub fn micromark_with_options(value: &str, options: &Options) -> String {
-    let (events, bytes) = parse(value, options);
-    compile(&events, bytes, options)
+pub fn micromark_with_options(value: &str, options: &Options) -> Result<String, String> {
+    let (events, bytes) = parse(value, options)?;
+    Ok(compile(&events, bytes, options))
 }
diff --git a/src/parser.rs b/src/parser.rs
index 62b3e03..3a7713a 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -26,7 +26,7 @@ pub struct ParseState<'a> {
 /// Turn a string of markdown into events.
 ///
 /// Passes the bytes back so the compiler can access the source.
-pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) {
+pub fn parse<'a>(value: &'a str, options: &'a Options) -> Result<(Vec<Event>, &'a [u8]), String> {
     let mut parse_state = ParseState {
         options,
         bytes: value.as_bytes(),
@@ -49,7 +49,7 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8])
         (parse_state.bytes.len(), 0),
         State::Next(StateName::DocumentStart),
     );
-    tokenizer.flush(state, true);
+    tokenizer.flush(state, true)?;
 
     let mut events = tokenizer.events;
 
@@ -58,7 +58,7 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8])
     parse_state.gfm_footnote_definitions = footnote;
     parse_state.definitions = normal;
 
-    while !subtokenize(&mut events, &parse_state) {}
+    while !(subtokenize(&mut events, &parse_state)?) {}
 
-    (events, parse_state.bytes)
+    Ok((events, parse_state.bytes))
 }
diff --git a/src/state.rs b/src/state.rs
index 3294a2f..e8bd17a 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -2,10 +2,15 @@
 
 use crate::construct;
 use crate::tokenizer::Tokenizer;
+use alloc::string::{String, ToString};
 
 /// Result of a state.
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+#[derive(Clone, Debug, Eq, PartialEq)]
 pub enum State {
+    /// Syntax error.
+    ///
+    /// Only used by MDX.
+    Error(String),
     /// Move to [`Name`][] next.
     Next(Name),
     /// Retry in [`Name`][].
@@ -16,6 +21,24 @@ pub enum State {
     Nok,
 }
 
+impl State {
+    /// Turn a final state into a result.
+    ///
+    /// This doesn’t work on future states ([`State::Next`], [`State::Retry`]),
+    /// or on an attempt ([`State::Nok`]).
+    ///
+    /// But it turns the final result into an error if crashed.
+    pub fn to_result(&self) -> Result<(), String> {
+        match self {
+            State::Nok | State::Next(_) | State::Retry(_) => {
+                unreachable!("cannot turn intermediate state into result")
+            }
+            State::Ok => Ok(()),
+            State::Error(x) => Err(x.to_string()),
+        }
+    }
+}
+
 /// Names of states to move to.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 #[allow(clippy::enum_variant_names)]
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 7fcc481..12f91cf 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -22,7 +22,7 @@ use crate::parser::ParseState;
 use crate::state::{Name as StateName, State};
 use crate::tokenizer::Tokenizer;
 use crate::util::{edit_map::EditMap, skip};
-use alloc::{vec, vec::Vec};
+use alloc::{string::String, vec, vec::Vec};
 
 /// Link two [`Event`][]s.
 ///
@@ -69,7 +69,7 @@ pub fn link_to(events: &mut [Event], previous: usize, next: usize) {
 /// Parse linked events.
 ///
 /// Supposed to be called repeatedly, returns `true` when done.
-pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
+pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result<bool, String> {
     let mut map = EditMap::new();
     let mut done = true;
     let mut index = 0;
@@ -143,7 +143,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
                     link_index = link_curr.next;
                 }
 
-                tokenizer.flush(state, true);
+                tokenizer.flush(state, true)?;
 
                 divide_events(&mut map, events, index, &mut tokenizer.events);
 
@@ -156,7 +156,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
 
     map.consume(events);
 
-    done
+    Ok(done)
 }
 
 /// Divide `child_events` over links in `events`, the first of which is at
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 8843e47..7dbd158 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -616,11 +616,12 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Flush.
-    pub fn flush(&mut self, state: State, resolve: bool) {
+    pub fn flush(&mut self, state: State, resolve: bool) -> Result<(), String> {
         let to = (self.point.index, self.point.vs);
-        push_impl(self, to, to, state, true);
+        let state = push_impl(self, to, to, state, true);
+        let result = state.to_result();
 
-        if resolve {
+        if resolve && result.is_ok() {
             let resolvers = self.resolvers.split_off(0);
             let mut index = 0;
             while index < resolvers.len() {
@@ -630,6 +631,8 @@ impl<'a> Tokenizer<'a> {
 
             self.map.consume(&mut self.events);
         }
+
+        result
     }
 }
 
@@ -678,6 +681,7 @@ fn push_impl(
 
     loop {
         match state {
+            State::Error(_) => break,
             State::Ok | State::Nok => {
                 if let Some(attempt) = tokenizer.attempts.pop() {
                     if attempt.kind == AttemptKind::Check || state == State::Nok {
@@ -743,9 +747,12 @@ fn push_impl(
     tokenizer.consumed = true;
 
     if flush {
-        debug_assert!(matches!(state, State::Ok), "must be ok");
+        debug_assert!(matches!(state, State::Ok | State::Error(_)), "must be ok");
     } else {
-        debug_assert!(matches!(state, State::Next(_)), "must have a next state");
+        debug_assert!(
+            matches!(state, State::Next(_) | State::Error(_)),
+            "must have a next state"
+        );
     }
 
     state
author	Titus Wormer <tituswormer@gmail.com>	2022-09-07 15:53:06 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-09-07 15:53:06 +0200
commit	1d92666865b35341e076efbefddf6e73b5e1542e (patch)
tree	11c05985ec7679f73473e7ea2c769465698e2f08 /src
parent	e6018e52ee6ad9a8f8a0672b75bf515faf74af1f (diff)
download	markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.gz markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.bz2 markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.zip