diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-16 11:34:35 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-16 11:34:35 +0200 |
commit | 58ba69452a25c3d4b2059c01cc6cd837159d2f90 (patch) | |
tree | 7f6d49449f564ec8606cc3881210d8b27df11961 | |
parent | 7875ada79cea1194dc9e15acee36ed0700be70e6 (diff) | |
download | markdown-rs-58ba69452a25c3d4b2059c01cc6cd837159d2f90.tar.gz markdown-rs-58ba69452a25c3d4b2059c01cc6cd837159d2f90.tar.bz2 markdown-rs-58ba69452a25c3d4b2059c01cc6cd837159d2f90.zip |
Add support for hard break escape
-rw-r--r-- | readme.md | 6 | ||||
-rw-r--r-- | src/compiler.rs | 6 | ||||
-rw-r--r-- | src/construct/character_escape.rs | 11 | ||||
-rw-r--r-- | src/construct/hard_break_escape.rs | 61 | ||||
-rw-r--r-- | src/construct/mod.rs | 3 | ||||
-rw-r--r-- | src/content/text.rs | 7 | ||||
-rw-r--r-- | src/tokenizer.rs | 14 | ||||
-rw-r--r-- | tests/character_escape.rs | 11 | ||||
-rw-r--r-- | tests/hard_break_escape.rs | 167 |
9 files changed, 265 insertions, 21 deletions
@@ -111,7 +111,7 @@ cargo doc --document-private-items - [x] (1) code (text) - [ ] (3) content - [ ] (3) definition -- [ ] (1) hard break escape +- [x] (1) hard break escape - [x] heading (atx) - [ ] (1) heading (setext) - [x] html (flow) @@ -122,6 +122,7 @@ cargo doc --document-private-items - [ ] (8) list - [ ] (1) paragraph - [x] thematic break +- [ ] (1) trailing break escape ### Content types @@ -146,11 +147,12 @@ cargo doc --document-private-items - [x] character escape - [x] character reference - [x] code (text) - - [ ] hard break escape + - [x] hard break escape - [x] html (text) - [ ] label end - [ ] label start (image) - [ ] label start (link) + - [ ] trailing break escape - [x] string - [x] character escape - [x] character reference diff --git a/src/compiler.rs b/src/compiler.rs index 6127231..3aacca0 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -150,6 +150,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::BlankLineEnding | TokenType::BlankLineWhitespace | TokenType::Whitespace + | TokenType::HardBreakEscape + | TokenType::HardBreakEscapeMarker | TokenType::HtmlFlowData | TokenType::HtmlTextData | TokenType::CodeFencedFence @@ -192,6 +194,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CharacterEscapeMarker | TokenType::CharacterReference | TokenType::CharacterReferenceMarkerSemi + | TokenType::HardBreakEscapeMarker | TokenType::Autolink | TokenType::AutolinkMarker => {} TokenType::HtmlFlow | TokenType::HtmlText => { @@ -208,6 +211,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St TokenType::Paragraph => { buf_tail_mut(buffers).push("</p>".to_string()); } + TokenType::HardBreakEscape => { + buf_tail_mut(buffers).push("<br />".to_string()); + } TokenType::CodeIndented | TokenType::CodeFenced => { let seen_data = code_flow_seen_data.expect("`code_flow_seen_data` must be defined"); diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 7bab42d..baedd4b 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -11,11 +11,11 @@ //! slash, or a slash followed by anything other than an ASCII punctuation //! character, is exactly that: just a slash. //! To escape (most) arbitrary characters, use a -//! [character reference][] instead +//! [character reference][character_reference] instead //! (as in, `&`, `{`, or say `	`). //! It is also possible to escape a line ending in text with a similar -//! construct: a backslash followed by a line ending (that is part of the -//! construct instead of ending it). +//! construct: a [hard break escape][hard_break_escape] is a backslash followed +//! by a line ending (that is part of the construct instead of ending it). //! //! ## References //! @@ -24,9 +24,8 @@ //! //! [string]: crate::content::string //! [text]: crate::content::text -//! [character reference]: crate::construct::character_reference -//! -//! <!-- To do: link `hard_break_escape` --> +//! [character_reference]: crate::construct::character_reference +//! [hard_break_escape]: crate::construct::hard_break_escape use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs new file mode 100644 index 0000000..a7712d6 --- /dev/null +++ b/src/construct/hard_break_escape.rs @@ -0,0 +1,61 @@ +//! Hard break escapes are a construct that occurs in the [text][] content +//! type. +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! ; Restriction: followed by a line ending (that is part of the construct +//! ; instead of ending it). +//! hard_break_escape ::= '\\' +//! ``` +//! It is also possible to escape punctuation characters with a similar +//! construct: a [character escape][character_escape] is a backslash followed +//! by an ASCII punctuation character. +//! Arbitrary characters can be escaped with +//! [character reference][character_reference]s. +//! +//! ## References +//! +//! * [`hard-break-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/hard-break-escape.js) +//! * [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks) +//! +//! [text]: crate::content::text +//! [character_escape]: crate::construct::character_escape +//! [character_reference]: crate::construct::character_reference +//! +//! <!-- To do: link `hard_break_escape` --> + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of a hard break escape. +/// +/// ```markdown +/// a|\ +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('\\') => { + tokenizer.enter(TokenType::HardBreakEscape); + tokenizer.enter(TokenType::HardBreakEscapeMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::HardBreakEscapeMarker); + (State::Fn(Box::new(inside)), None) + } + _ => (State::Nok, None), + } +} + +/// At the end of a hard break escape, after `\`. +/// +/// ```markdown +/// a\| +/// ``` +fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.exit(TokenType::HardBreakEscape); + (State::Ok, Some(vec![code])) + } + _ => (State::Nok, None), + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 1fa57d5..27f4308 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -26,7 +26,7 @@ //! * [code (text)][code_text] //! * content //! * definition -//! * hard break escape +//! * [hard break escape][hard_break_escape] //! * [heading (atx)][heading_atx] //! * heading (setext) //! * [html (flow)][html_flow] @@ -60,6 +60,7 @@ pub mod character_reference; pub mod code_fenced; pub mod code_indented; pub mod code_text; +pub mod hard_break_escape; pub mod heading_atx; pub mod html_flow; pub mod html_text; diff --git a/src/content/text.rs b/src/content/text.rs index 9d510cb..d4d5493 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -8,7 +8,7 @@ //! * [Autolink][crate::construct::autolink] //! * Attention //! * [HTML (text)][crate::construct::html_text] -//! * Hard break escape +//! * [Hard break escape][crate::construct::hard_break_escape] //! * [Code (text)][crate::construct::code_text] //! * Line ending //! * Label start (image) @@ -19,7 +19,7 @@ use crate::construct::{ autolink::start as autolink, character_escape::start as character_escape, character_reference::start as character_reference, code_text::start as code_text, - html_text::start as html_text, + hard_break_escape::start as hard_break_escape, html_text::start as html_text, }; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -35,9 +35,10 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Ok, None), - _ => tokenizer.attempt_5( + _ => tokenizer.attempt_6( character_reference, character_escape, + hard_break_escape, autolink, html_text, code_text, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c5df42b..a63d209 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -60,6 +60,9 @@ pub enum TokenType { Data, + HardBreakEscape, + HardBreakEscapeMarker, + HtmlFlow, HtmlFlowData, @@ -441,6 +444,7 @@ impl Tokenizer { None, None, None, + None, done, ) } @@ -459,18 +463,20 @@ impl Tokenizer { Some(Box::new(c)), None, None, + None, done, ) } - #[allow(clippy::many_single_char_names)] - pub fn attempt_5( + #[allow(clippy::too_many_arguments, clippy::many_single_char_names)] + pub fn attempt_6( &mut self, a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, d: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, e: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + f: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, done: impl FnOnce(bool) -> Box<StateFn> + 'static, ) -> Box<StateFn> { self.call_multiple( @@ -480,6 +486,7 @@ impl Tokenizer { Some(Box::new(c)), Some(Box::new(d)), Some(Box::new(e)), + Some(Box::new(f)), done, ) } @@ -493,6 +500,7 @@ impl Tokenizer { c: Option<Box<StateFn>>, d: Option<Box<StateFn>>, e: Option<Box<StateFn>>, + f: Option<Box<StateFn>>, done: impl FnOnce(bool) -> Box<StateFn> + 'static, ) -> Box<StateFn> { if let Some(head) = a { @@ -501,7 +509,7 @@ impl Tokenizer { done(ok) } else { Box::new(move |tokenizer: &mut Tokenizer, code| { - tokenizer.call_multiple(check, b, c, d, e, None, done)(tokenizer, code) + tokenizer.call_multiple(check, b, c, d, e, f, None, done)(tokenizer, code) }) } }; diff --git a/tests/character_escape.rs b/tests/character_escape.rs index aae0b58..c81760d 100644 --- a/tests/character_escape.rs +++ b/tests/character_escape.rs @@ -30,12 +30,11 @@ fn character_escape() { "should escape other constructs" ); - // To do: hard break. - // assert_eq!( - // micromark("foo\\\nbar"), - // "<p>foo<br />\nbar</p>", - // "should escape a line break" - // ); + assert_eq!( + micromark("foo\\\nbar"), + "<p>foo<br />\nbar</p>", + "should escape a line break" + ); assert_eq!( micromark("`` \\[\\` ``"), diff --git a/tests/hard_break_escape.rs b/tests/hard_break_escape.rs new file mode 100644 index 0000000..fe4c82b --- /dev/null +++ b/tests/hard_break_escape.rs @@ -0,0 +1,167 @@ +extern crate micromark; +use micromark::{micromark}; + +#[test] +fn hard_break_escape() { + // To do: trailing. + // assert_eq!( + // micromark("foo \nbaz"), + // "<p>foo<br />\nbaz</p>", + // "should support two trailing spaces to form a hard break" + // ); + + assert_eq!( + micromark("foo\\\nbaz"), + "<p>foo<br />\nbaz</p>", + "should support a backslash to form a hard break" + ); + + // To do: trailing. + // assert_eq!( + // micromark("foo \nbaz"), + // "<p>foo<br />\nbaz</p>", + // "should support multiple trailing spaces" + // ); + + // To do: trailing. + // assert_eq!( + // micromark("foo \n bar"), + // "<p>foo<br />\nbar</p>", + // "should support leading spaces after a trailing hard break" + // ); + + // To do: trim paragraph whitespace. + // assert_eq!( + // micromark("foo\\\n bar"), + // "<p>foo<br />\nbar</p>", + // "should support leading spaces after an escape hard break" + // ); + + // To do: trailing, attention. + // assert_eq!( + // micromark("*foo \nbar*"), + // "<p><em>foo<br />\nbar</em></p>", + // "should support trailing hard breaks in emphasis" + // ); + + // To do: attention. + // assert_eq!( + // micromark("*foo\\\nbar*"), + // "<p><em>foo<br />\nbar</em></p>", + // "should support escape hard breaks in emphasis" + // ); + + assert_eq!( + micromark("`code \ntext`"), + "<p><code>code text</code></p>", + "should not support trailing hard breaks in code" + ); + + assert_eq!( + micromark("``code\\\ntext``"), + "<p><code>code\\ text</code></p>", + "should not support escape hard breaks in code" + ); + + // To do: paragraph trimming. + // assert_eq!( + // micromark("foo "), + // "<p>foo</p>", + // "should not support trailing hard breaks at the end of a paragraph" + // ); + + assert_eq!( + micromark("foo\\"), + "<p>foo\\</p>", + "should not support escape hard breaks at the end of a paragraph" + ); + + assert_eq!( + micromark("### foo\\"), + "<h3>foo\\</h3>", + "should not support escape hard breaks at the end of a heading" + ); + + assert_eq!( + micromark("### foo "), + "<h3>foo</h3>", + "should not support trailing hard breaks at the end of a heading" + ); + + // To do: paragraph trimming. + // assert_eq!( + // micromark("aaa \t\nbb"), + // "<p>aaa\nbb</p>", + // "should support a mixed line suffix (1)" + // ); + + // To do: paragraph trimming. + // assert_eq!( + // micromark("aaa\t \nbb"), + // "<p>aaa\nbb</p>", + // "should support a mixed line suffix (2)" + // ); + + // To do: paragraph trimming. + // assert_eq!( + // micromark("aaa \t \nbb"), + // "<p>aaa\nbb</p>", + // "should support a mixed line suffix (3)" + // ); + + // To do: trailing. + // assert_eq!( + // micromark("aaa\0 \nbb"), + // "<p>aaa�<br />\nbb</p>", + // "should support a hard break after a replacement character" + // ); + + // To do: trailing. + // assert_eq!( + // micromark("aaa\0\t\nbb"), + // "<p>aaa�\nbb</p>", + // "should support a line suffix after a replacement character" + // ); + + // To do: attention, trailing. + // assert_eq!( + // micromark("*a* \nbb"), + // "<p><em>a</em><br />\nbb</p>", + // "should support a hard break after a span" + // ); + + // To do: attention, trailing. + // assert_eq!( + // micromark("*a*\t\nbb"), + // "<p><em>a</em>\nbb</p>", + // "should support a line suffix after a span" + // ); + + // To do: attention, trailing. + // assert_eq!( + // micromark("*a* \t\nbb"), + // "<p><em>a</em>\nbb</p>", + // "should support a mixed line suffix after a span (1)" + // ); + + // To do: attention, trailing. + // assert_eq!( + // micromark("*a*\t \nbb"), + // "<p><em>a</em>\nbb</p>", + // "should support a mixed line suffix after a span (2)" + // ); + + // To do: attention, trailing. + // assert_eq!( + // micromark("*a* \t \nbb"), + // "<p><em>a</em>\nbb</p>", + // "should support a mixed line suffix after a span (3)" + // ); + + // // To do: turning off things. + // assert_eq!( + // micromark("a\\\nb", {extensions: [{disable: {null: ["hardBreakEscape"]}}]}), + // "<p>a\\\nb</p>", + // "should support turning off hard break (escape)" + // ); +} |