diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-09-07 15:53:06 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-09-07 15:53:06 +0200 |
commit | 1d92666865b35341e076efbefddf6e73b5e1542e (patch) | |
tree | 11c05985ec7679f73473e7ea2c769465698e2f08 /src | |
parent | e6018e52ee6ad9a8f8a0672b75bf515faf74af1f (diff) | |
download | markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.gz markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.tar.bz2 markdown-rs-1d92666865b35341e076efbefddf6e73b5e1542e.zip |
Add support for recoverable syntax errors
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler.rs | 2 | ||||
-rw-r--r-- | src/construct/document.rs | 21 | ||||
-rw-r--r-- | src/construct/mdx_jsx_text.rs | 88 | ||||
-rw-r--r-- | src/lib.rs | 103 | ||||
-rw-r--r-- | src/parser.rs | 8 | ||||
-rw-r--r-- | src/state.rs | 25 | ||||
-rw-r--r-- | src/subtokenize.rs | 8 | ||||
-rw-r--r-- | src/tokenizer.rs | 17 |
8 files changed, 175 insertions, 97 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index b271768..4f0f958 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -333,7 +333,7 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { generate_footnote_section(&mut context); } - assert_eq!(context.buffers.len(), 1, "expected 1 final buffer"); + debug_assert_eq!(context.buffers.len(), 1, "expected 1 final buffer"); context .buffers .get(0) diff --git a/src/construct/document.rs b/src/construct/document.rs index e31e58d..57c5f3a 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -14,7 +14,7 @@ use crate::state::{Name as StateName, State}; use crate::subtokenize::divide_events; use crate::tokenizer::{Container, ContainerState, Tokenizer}; use crate::util::skip; -use alloc::{boxed::Box, vec::Vec}; +use alloc::{boxed::Box, string::String, vec::Vec}; /// Phases where we can exit containers. #[derive(Debug, PartialEq)] @@ -266,7 +266,9 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.document_continued != tokenizer.tokenize_state.document_container_stack.len() { - exit_containers(tokenizer, &Phase::Prefix); + if let Err(message) = exit_containers(tokenizer, &Phase::Prefix) { + return State::Error(message); + } } // We are “piercing” into the flow with a new container. @@ -361,6 +363,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { let state = tokenizer .tokenize_state .document_child_state + .take() .unwrap_or(State::Next(StateName::FlowStart)); tokenizer.tokenize_state.document_exits.push(None); @@ -439,13 +442,17 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { if tokenizer.tokenize_state.document_continued != tokenizer.tokenize_state.document_container_stack.len() { - exit_containers(tokenizer, &Phase::After); + if let Err(message) = exit_containers(tokenizer, &Phase::After) { + return State::Error(message); + } } match tokenizer.current { None => { tokenizer.tokenize_state.document_continued = 0; - exit_containers(tokenizer, &Phase::Eof); + if let Err(message) = exit_containers(tokenizer, &Phase::Eof) { + return State::Error(message); + } resolve(tokenizer); State::Ok } @@ -461,7 +468,7 @@ pub fn flow_end(tokenizer: &mut Tokenizer) -> State { } /// Close containers (and flow if needed). -fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { +fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) -> Result<(), String> { let mut stack_close = tokenizer .tokenize_state .document_container_stack @@ -477,7 +484,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { .take() .unwrap_or(State::Next(StateName::FlowStart)); - child.flush(state, false); + child.flush(state, false)?; } if !stack_close.is_empty() { @@ -524,6 +531,8 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { } child.interrupt = false; + + Ok(()) } // Inject everything together. diff --git a/src/construct/mdx_jsx_text.rs b/src/construct/mdx_jsx_text.rs index deeb3e9..4c71fec 100644 --- a/src/construct/mdx_jsx_text.rs +++ b/src/construct/mdx_jsx_text.rs @@ -76,10 +76,10 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State { // Fragment opening tag. Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd), _ => { - // To do: unicode. - let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index); - - if id_start(char_opt) { + if id_start(char_after_index( + tokenizer.parse_state.bytes, + tokenizer.point.index, + )) { tokenizer.enter(Name::MdxJsxTextTagName); tokenizer.enter(Name::MdxJsxTextTagNamePrimary); tokenizer.consume(); @@ -111,34 +111,32 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - // Fragment closing tag. - Some(b'>') => State::Retry(StateName::MdxJsxTextTagEnd), - // Start of a closing tag name. - _ => { - // To do: unicode. - let char_opt = char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index); - - if id_start(char_opt) { - tokenizer.enter(Name::MdxJsxTextTagName); - tokenizer.enter(Name::MdxJsxTextTagNamePrimary); - tokenizer.consume(); - State::Next(StateName::MdxJsxTextPrimaryName) - } else { - crash( - tokenizer, - "before name", - &format!( - "a character that can start a name, such as a letter, `$`, or `_`{}", - if tokenizer.current == Some(b'*' | b'/') { - " (note: JS comments in JSX tags are not supported in MDX)" - } else { - "" - } - ), - ) - } - } + // Fragment closing tag. + if let Some(b'>') = tokenizer.current { + State::Retry(StateName::MdxJsxTextTagEnd) + } + // Start of a closing tag name. + else if id_start(char_after_index( + tokenizer.parse_state.bytes, + tokenizer.point.index, + )) { + tokenizer.enter(Name::MdxJsxTextTagName); + tokenizer.enter(Name::MdxJsxTextTagNamePrimary); + tokenizer.consume(); + State::Next(StateName::MdxJsxTextPrimaryName) + } else { + crash( + tokenizer, + "before name", + &format!( + "a character that can start a name, such as a letter, `$`, or `_`{}", + if tokenizer.current == Some(b'*' | b'/') { + " (note: JS comments in JSX tags are not supported in MDX)" + } else { + "" + } + ), + ) } } @@ -162,7 +160,6 @@ pub fn primary_name(tokenizer: &mut Tokenizer) -> State { } // Continuation of name: remain. // Allow continuation bytes. - // To do: unicode. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -284,7 +281,7 @@ pub fn member_name(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::MdxJsxTextEsWhitespaceStart) } // Continuation of name: remain. - // To do: unicode. + // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -398,7 +395,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::MdxJsxTextEsWhitespaceStart) } // Continuation of name: remain. - // To do: unicode. + // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -516,8 +513,8 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::MdxJsxTextEsWhitespaceStart) } - // Continuation of the attribute name: remain. - // To do: unicode. + // Continuation of name: remain. + // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -525,7 +522,7 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State { )) { tokenizer.consume(); - State::Next(StateName::MdxJsxTextLocalName) + State::Next(StateName::MdxJsxTextAttributePrimaryName) } else { crash( tokenizer, @@ -643,8 +640,8 @@ pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State { ); State::Retry(StateName::MdxJsxTextEsWhitespaceStart) } - // Continuation of local name: remain. - // To do: unicode. + // Continuation of name: remain. + // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) || id_cont(char_after_index( tokenizer.parse_state.bytes, @@ -906,7 +903,6 @@ pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State { } } -// To do: unicode. fn id_start(code: Option<char>) -> bool { if let Some(char) = code { UnicodeID::is_id_start(char) || matches!(char, '$' | '_') @@ -915,7 +911,6 @@ fn id_start(code: Option<char>) -> bool { } } -// To do: unicode. fn id_cont(code: Option<char>) -> bool { if let Some(char) = code { UnicodeID::is_id_continue(char) || matches!(char, '-' | '\u{200c}' | '\u{200d}') @@ -924,25 +919,24 @@ fn id_cont(code: Option<char>) -> bool { } } -fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> ! { +fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> State { // To do: externalize this, and the print mechanism in the tokenizer, // to one proper formatter. - // To do: figure out how Rust does errors? let actual = match tokenizer.current { None => "end of file".to_string(), Some(byte) => format_byte(byte), }; - unreachable!( + State::Error(format!( "{}:{}: Unexpected {} {}, expected {}", tokenizer.point.line, tokenizer.point.column, actual, at, expect - ) + )) } fn format_byte(byte: u8) -> String { match byte { b'`' => "`` ` ``".to_string(), b' '..=b'~' => format!("`{}`", str::from_utf8(&[byte]).unwrap()), - _ => format!("U+{:>04X}", byte), + _ => format!("character U+{:>04X}", byte), } } @@ -406,6 +406,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options}; + /// # fn main() -> Result<(), String> { /// /// // micromark is safe by default: /// assert_eq!( @@ -421,9 +422,11 @@ pub struct Options { /// allow_dangerous_html: true, /// ..Options::default() /// } - /// ), + /// )?, /// "<p>Hi, <i>venus</i>!</p>" /// ); + /// # Ok(()) + /// # } /// ``` pub allow_dangerous_html: bool, @@ -435,6 +438,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options}; + /// # fn main() -> Result<(), String> { /// /// // micromark is safe by default: /// assert_eq!( @@ -450,9 +454,11 @@ pub struct Options { /// allow_dangerous_protocol: true, /// ..Options::default() /// } - /// ), + /// )?, /// "<p><a href=\"javascript:alert(1)\">javascript:alert(1)</a></p>" /// ); + /// # Ok(()) + /// # } /// ``` pub allow_dangerous_protocol: bool, @@ -463,6 +469,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// # fn main() -> Result<(), String> { /// /// // micromark follows CommonMark by default: /// assert_eq!( @@ -481,9 +488,11 @@ pub struct Options { /// }, /// ..Options::default() /// } - /// ), + /// )?, /// "<p>indented code?</p>" /// ); + /// # Ok(()) + /// # } /// ``` pub constructs: Constructs, @@ -503,6 +512,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options, LineEnding}; + /// # fn main() -> Result<(), String> { /// /// // micromark uses `\n` by default: /// assert_eq!( @@ -518,9 +528,11 @@ pub struct Options { /// default_line_ending: LineEnding::CarriageReturnLineFeed, /// ..Options::default() /// } - /// ), + /// )?, /// "<blockquote>\r\n<p>a</p>\r\n</blockquote>" /// ); + /// # Ok(()) + /// # } /// ``` pub default_line_ending: LineEnding, @@ -534,6 +546,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// # fn main() -> Result<(), String> { /// /// // `"Footnotes"` is used by default: /// assert_eq!( @@ -543,7 +556,7 @@ pub struct Options { /// constructs: Constructs::gfm(), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); /// @@ -556,9 +569,11 @@ pub struct Options { /// gfm_footnote_label: Some("Notes de bas de page".to_string()), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Notes de bas de page</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); + /// # Ok(()) + /// # } /// ``` pub gfm_footnote_label: Option<String>, @@ -570,6 +585,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// # fn main() -> Result<(), String> { /// /// // `"h2"` is used by default: /// assert_eq!( @@ -579,7 +595,7 @@ pub struct Options { /// constructs: Constructs::gfm(), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); /// @@ -592,9 +608,11 @@ pub struct Options { /// gfm_footnote_label_tag_name: Some("h1".to_string()), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h1 id=\"footnote-label\" class=\"sr-only\">Footnotes</h1>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); + /// # Ok(()) + /// # } /// ``` pub gfm_footnote_label_tag_name: Option<String>, @@ -612,6 +630,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// # fn main() -> Result<(), String> { /// /// // `"class=\"sr-only\""` is used by default: /// assert_eq!( @@ -621,7 +640,7 @@ pub struct Options { /// constructs: Constructs::gfm(), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); /// @@ -634,9 +653,11 @@ pub struct Options { /// gfm_footnote_label_attributes: Some("class=\"footnote-heading\"".to_string()), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"footnote-heading\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); + /// # Ok(()) + /// # } /// ``` pub gfm_footnote_label_attributes: Option<String>, @@ -649,6 +670,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// # fn main() -> Result<(), String> { /// /// // `"Back to content"` is used by default: /// assert_eq!( @@ -658,7 +680,7 @@ pub struct Options { /// constructs: Constructs::gfm(), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); /// @@ -671,9 +693,11 @@ pub struct Options { /// gfm_footnote_back_label: Some("Arrière".to_string()), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Arrière\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); + /// # Ok(()) + /// # } /// ``` pub gfm_footnote_back_label: Option<String>, @@ -696,6 +720,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// # fn main() -> Result<(), String> { /// /// // `"user-content-"` is used by default: /// assert_eq!( @@ -705,7 +730,7 @@ pub struct Options { /// constructs: Constructs::gfm(), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); /// @@ -718,9 +743,11 @@ pub struct Options { /// gfm_footnote_clobber_prefix: Some("".to_string()), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><sup><a href=\"#fn-a\" id=\"fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"fn-a\">\n<p>b <a href=\"#fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" /// ); + /// # Ok(()) + /// # } /// ``` pub gfm_footnote_clobber_prefix: Option<String>, @@ -733,6 +760,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// # fn main() -> Result<(), String> { /// /// // micromark supports single tildes by default: /// assert_eq!( @@ -742,7 +770,7 @@ pub struct Options { /// constructs: Constructs::gfm(), /// ..Options::default() /// } - /// ), + /// )?, /// "<p><del>a</del></p>" /// ); /// @@ -755,9 +783,11 @@ pub struct Options { /// gfm_strikethrough_single_tilde: false, /// ..Options::default() /// } - /// ), + /// )?, /// "<p>~a~</p>" /// ); + /// # Ok(()) + /// # } /// ``` pub gfm_strikethrough_single_tilde: bool, @@ -772,6 +802,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark_with_options, Options, Constructs}; + /// # fn main() -> Result<(), String> { /// /// // With `allow_dangerous_html`, micromark passes HTML through untouched: /// assert_eq!( @@ -782,7 +813,7 @@ pub struct Options { /// constructs: Constructs::gfm(), /// ..Options::default() /// } - /// ), + /// )?, /// "<iframe>" /// ); /// @@ -796,9 +827,11 @@ pub struct Options { /// gfm_tagfilter: true, /// ..Options::default() /// } - /// ), + /// )?, /// "<iframe>" /// ); + /// # Ok(()) + /// # } /// ``` /// /// ## References @@ -817,6 +850,7 @@ pub struct Options { /// /// ``` /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// # fn main() -> Result<(), String> { /// /// // micromark supports single dollars by default: /// assert_eq!( @@ -829,7 +863,7 @@ pub struct Options { /// }, /// ..Options::default() /// } - /// ), + /// )?, /// "<p><code class=\"language-math math-inline\">a</code></p>" /// ); /// @@ -845,9 +879,11 @@ pub struct Options { /// math_text_single_dollar: false, /// ..Options::default() /// } - /// ), + /// )?, /// "<p>$a$</p>" /// ); + /// # Ok(()) + /// # } /// ``` pub math_text_single_dollar: bool, } @@ -879,32 +915,41 @@ impl Default for Options { /// ``` /// use micromark::micromark; /// -/// let result = micromark("# Hello, world!"); -/// -/// assert_eq!(result, "<h1>Hello, world!</h1>"); +/// assert_eq!(micromark("# Hello, world!"), "<h1>Hello, world!</h1>"); /// ``` #[must_use] +#[allow(clippy::missing_panics_doc)] pub fn micromark(value: &str) -> String { - micromark_with_options(value, &Options::default()) + micromark_with_options(value, &Options::default()).unwrap() } /// Turn markdown into HTML, with configuration. /// +/// ## Errors +/// +/// `micromark_with_options` never errors with normal markdown because markdown +/// does not have syntax errors, so feel free to `unwrap()`. +/// However, MDX does have syntax errors. +/// When MDX is turned on, there are several errors that can occur with how +/// JSX, expressions, or ESM are written. +/// /// ## Examples /// /// ``` /// use micromark::{micromark_with_options, Options}; +/// # fn main() -> Result<(), String> { /// /// let result = micromark_with_options("<div>\n\n# Hello, world!\n\n</div>", &Options { /// allow_dangerous_html: true, /// allow_dangerous_protocol: true, /// ..Options::default() -/// }); +/// })?; /// /// assert_eq!(result, "<div>\n<h1>Hello, world!</h1>\n</div>"); +/// # Ok(()) +/// # } /// ``` -#[must_use] -pub fn micromark_with_options(value: &str, options: &Options) -> String { - let (events, bytes) = parse(value, options); - compile(&events, bytes, options) +pub fn micromark_with_options(value: &str, options: &Options) -> Result<String, String> { + let (events, bytes) = parse(value, options)?; + Ok(compile(&events, bytes, options)) } diff --git a/src/parser.rs b/src/parser.rs index 62b3e03..3a7713a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -26,7 +26,7 @@ pub struct ParseState<'a> { /// Turn a string of markdown into events. /// /// Passes the bytes back so the compiler can access the source. -pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) { +pub fn parse<'a>(value: &'a str, options: &'a Options) -> Result<(Vec<Event>, &'a [u8]), String> { let mut parse_state = ParseState { options, bytes: value.as_bytes(), @@ -49,7 +49,7 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) (parse_state.bytes.len(), 0), State::Next(StateName::DocumentStart), ); - tokenizer.flush(state, true); + tokenizer.flush(state, true)?; let mut events = tokenizer.events; @@ -58,7 +58,7 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) parse_state.gfm_footnote_definitions = footnote; parse_state.definitions = normal; - while !subtokenize(&mut events, &parse_state) {} + while !(subtokenize(&mut events, &parse_state)?) {} - (events, parse_state.bytes) + Ok((events, parse_state.bytes)) } diff --git a/src/state.rs b/src/state.rs index 3294a2f..e8bd17a 100644 --- a/src/state.rs +++ b/src/state.rs @@ -2,10 +2,15 @@ use crate::construct; use crate::tokenizer::Tokenizer; +use alloc::string::{String, ToString}; /// Result of a state. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] pub enum State { + /// Syntax error. + /// + /// Only used by MDX. + Error(String), /// Move to [`Name`][] next. Next(Name), /// Retry in [`Name`][]. @@ -16,6 +21,24 @@ pub enum State { Nok, } +impl State { + /// Turn a final state into a result. + /// + /// This doesn’t work on future states ([`State::Next`], [`State::Retry`]), + /// or on an attempt ([`State::Nok`]). + /// + /// But it turns the final result into an error if crashed. + pub fn to_result(&self) -> Result<(), String> { + match self { + State::Nok | State::Next(_) | State::Retry(_) => { + unreachable!("cannot turn intermediate state into result") + } + State::Ok => Ok(()), + State::Error(x) => Err(x.to_string()), + } + } +} + /// Names of states to move to. #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[allow(clippy::enum_variant_names)] diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 7fcc481..12f91cf 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -22,7 +22,7 @@ use crate::parser::ParseState; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::{edit_map::EditMap, skip}; -use alloc::{vec, vec::Vec}; +use alloc::{string::String, vec, vec::Vec}; /// Link two [`Event`][]s. /// @@ -69,7 +69,7 @@ pub fn link_to(events: &mut [Event], previous: usize, next: usize) { /// Parse linked events. /// /// Supposed to be called repeatedly, returns `true` when done. -pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { +pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result<bool, String> { let mut map = EditMap::new(); let mut done = true; let mut index = 0; @@ -143,7 +143,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { link_index = link_curr.next; } - tokenizer.flush(state, true); + tokenizer.flush(state, true)?; divide_events(&mut map, events, index, &mut tokenizer.events); @@ -156,7 +156,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { map.consume(events); - done + Ok(done) } /// Divide `child_events` over links in `events`, the first of which is at diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8843e47..7dbd158 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -616,11 +616,12 @@ impl<'a> Tokenizer<'a> { } /// Flush. - pub fn flush(&mut self, state: State, resolve: bool) { + pub fn flush(&mut self, state: State, resolve: bool) -> Result<(), String> { let to = (self.point.index, self.point.vs); - push_impl(self, to, to, state, true); + let state = push_impl(self, to, to, state, true); + let result = state.to_result(); - if resolve { + if resolve && result.is_ok() { let resolvers = self.resolvers.split_off(0); let mut index = 0; while index < resolvers.len() { @@ -630,6 +631,8 @@ impl<'a> Tokenizer<'a> { self.map.consume(&mut self.events); } + + result } } @@ -678,6 +681,7 @@ fn push_impl( loop { match state { + State::Error(_) => break, State::Ok | State::Nok => { if let Some(attempt) = tokenizer.attempts.pop() { if attempt.kind == AttemptKind::Check || state == State::Nok { @@ -743,9 +747,12 @@ fn push_impl( tokenizer.consumed = true; if flush { - debug_assert!(matches!(state, State::Ok), "must be ok"); + debug_assert!(matches!(state, State::Ok | State::Error(_)), "must be ok"); } else { - debug_assert!(matches!(state, State::Next(_)), "must have a next state"); + debug_assert!( + matches!(state, State::Next(_) | State::Error(_)), + "must have a next state" + ); } state |