diff options
| author | 2022-06-16 11:34:35 +0200 | |
|---|---|---|
| committer | 2022-06-16 11:34:35 +0200 | |
| commit | 58ba69452a25c3d4b2059c01cc6cd837159d2f90 (patch) | |
| tree | 7f6d49449f564ec8606cc3881210d8b27df11961 /src | |
| parent | 7875ada79cea1194dc9e15acee36ed0700be70e6 (diff) | |
| download | markdown-rs-58ba69452a25c3d4b2059c01cc6cd837159d2f90.tar.gz markdown-rs-58ba69452a25c3d4b2059c01cc6cd837159d2f90.tar.bz2 markdown-rs-58ba69452a25c3d4b2059c01cc6cd837159d2f90.zip | |
Add support for hard break escape
Diffstat (limited to '')
| -rw-r--r-- | src/compiler.rs | 6 | ||||
| -rw-r--r-- | src/construct/character_escape.rs | 11 | ||||
| -rw-r--r-- | src/construct/hard_break_escape.rs | 61 | ||||
| -rw-r--r-- | src/construct/mod.rs | 3 | ||||
| -rw-r--r-- | src/content/text.rs | 7 | ||||
| -rw-r--r-- | src/tokenizer.rs | 14 | 
6 files changed, 89 insertions, 13 deletions
| diff --git a/src/compiler.rs b/src/compiler.rs index 6127231..3aacca0 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -150,6 +150,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                  | TokenType::BlankLineEnding                  | TokenType::BlankLineWhitespace                  | TokenType::Whitespace +                | TokenType::HardBreakEscape +                | TokenType::HardBreakEscapeMarker                  | TokenType::HtmlFlowData                  | TokenType::HtmlTextData                  | TokenType::CodeFencedFence @@ -192,6 +194,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                  | TokenType::CharacterEscapeMarker                  | TokenType::CharacterReference                  | TokenType::CharacterReferenceMarkerSemi +                | TokenType::HardBreakEscapeMarker                  | TokenType::Autolink                  | TokenType::AutolinkMarker => {}                  TokenType::HtmlFlow | TokenType::HtmlText => { @@ -208,6 +211,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                  TokenType::Paragraph => {                      buf_tail_mut(buffers).push("</p>".to_string());                  } +                TokenType::HardBreakEscape => { +                    buf_tail_mut(buffers).push("<br />".to_string()); +                }                  TokenType::CodeIndented | TokenType::CodeFenced => {                      let seen_data =                          code_flow_seen_data.expect("`code_flow_seen_data` must be defined"); diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 7bab42d..baedd4b 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -11,11 +11,11 @@  //! slash, or a slash followed by anything other than an ASCII punctuation  //! character, is exactly that: just a slash.  //! To escape (most) arbitrary characters, use a -//! [character reference][] instead +//! [character reference][character_reference] instead  //! (as in, `&`, `{`, or say `	`).  //! It is also possible to escape a line ending in text with a similar -//! construct: a backslash followed by a line ending (that is part of the -//! construct instead of ending it). +//! construct: a [hard break escape][hard_break_escape] is a backslash followed +//! by a line ending (that is part of the construct instead of ending it).  //!  //! ## References  //! @@ -24,9 +24,8 @@  //!  //! [string]: crate::content::string  //! [text]: crate::content::text -//! [character reference]: crate::construct::character_reference -//! -//! <!-- To do: link `hard_break_escape` --> +//! [character_reference]: crate::construct::character_reference +//! [hard_break_escape]: crate::construct::hard_break_escape  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs new file mode 100644 index 0000000..a7712d6 --- /dev/null +++ b/src/construct/hard_break_escape.rs @@ -0,0 +1,61 @@ +//! Hard break escapes are a construct that occurs in the  [text][] content +//! type. +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! ; Restriction: followed by a line ending  (that is part of the construct +//! ; instead of ending it). +//! hard_break_escape ::= '\\' +//! ``` +//! It is also possible to escape punctuation characters with a similar +//! construct: a [character escape][character_escape] is a backslash followed +//! by an ASCII punctuation character. +//! Arbitrary characters can be escaped with +//! [character reference][character_reference]s. +//! +//! ## References +//! +//! *   [`hard-break-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/hard-break-escape.js) +//! *   [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks) +//! +//! [text]: crate::content::text +//! [character_escape]: crate::construct::character_escape +//! [character_reference]: crate::construct::character_reference +//! +//! <!-- To do: link `hard_break_escape` --> + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of a hard break escape. +/// +/// ```markdown +/// a|\ +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char('\\') => { +            tokenizer.enter(TokenType::HardBreakEscape); +            tokenizer.enter(TokenType::HardBreakEscapeMarker); +            tokenizer.consume(code); +            tokenizer.exit(TokenType::HardBreakEscapeMarker); +            (State::Fn(Box::new(inside)), None) +        } +        _ => (State::Nok, None), +    } +} + +/// At the end of a hard break escape, after `\`. +/// +/// ```markdown +/// a\| +/// ``` +fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { +            tokenizer.exit(TokenType::HardBreakEscape); +            (State::Ok, Some(vec![code])) +        } +        _ => (State::Nok, None), +    } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 1fa57d5..27f4308 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -26,7 +26,7 @@  //! *   [code (text)][code_text]  //! *   content  //! *   definition -//! *   hard break escape +//! *   [hard break escape][hard_break_escape]  //! *   [heading (atx)][heading_atx]  //! *   heading (setext)  //! *   [html (flow)][html_flow] @@ -60,6 +60,7 @@ pub mod character_reference;  pub mod code_fenced;  pub mod code_indented;  pub mod code_text; +pub mod hard_break_escape;  pub mod heading_atx;  pub mod html_flow;  pub mod html_text; diff --git a/src/content/text.rs b/src/content/text.rs index 9d510cb..d4d5493 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -8,7 +8,7 @@  //! *   [Autolink][crate::construct::autolink]  //! *   Attention  //! *   [HTML (text)][crate::construct::html_text] -//! *   Hard break escape +//! *   [Hard break escape][crate::construct::hard_break_escape]  //! *   [Code (text)][crate::construct::code_text]  //! *   Line ending  //! *   Label start (image) @@ -19,7 +19,7 @@  use crate::construct::{      autolink::start as autolink, character_escape::start as character_escape,      character_reference::start as character_reference, code_text::start as code_text, -    html_text::start as html_text, +    hard_break_escape::start as hard_break_escape, html_text::start as html_text,  };  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -35,9 +35,10 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::None => (State::Ok, None), -        _ => tokenizer.attempt_5( +        _ => tokenizer.attempt_6(              character_reference,              character_escape, +            hard_break_escape,              autolink,              html_text,              code_text, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c5df42b..a63d209 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -60,6 +60,9 @@ pub enum TokenType {      Data, +    HardBreakEscape, +    HardBreakEscapeMarker, +      HtmlFlow,      HtmlFlowData, @@ -441,6 +444,7 @@ impl Tokenizer {              None,              None,              None, +            None,              done,          )      } @@ -459,18 +463,20 @@ impl Tokenizer {              Some(Box::new(c)),              None,              None, +            None,              done,          )      } -    #[allow(clippy::many_single_char_names)] -    pub fn attempt_5( +    #[allow(clippy::too_many_arguments, clippy::many_single_char_names)] +    pub fn attempt_6(          &mut self,          a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          d: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          e: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, +        f: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          done: impl FnOnce(bool) -> Box<StateFn> + 'static,      ) -> Box<StateFn> {          self.call_multiple( @@ -480,6 +486,7 @@ impl Tokenizer {              Some(Box::new(c)),              Some(Box::new(d)),              Some(Box::new(e)), +            Some(Box::new(f)),              done,          )      } @@ -493,6 +500,7 @@ impl Tokenizer {          c: Option<Box<StateFn>>,          d: Option<Box<StateFn>>,          e: Option<Box<StateFn>>, +        f: Option<Box<StateFn>>,          done: impl FnOnce(bool) -> Box<StateFn> + 'static,      ) -> Box<StateFn> {          if let Some(head) = a { @@ -501,7 +509,7 @@ impl Tokenizer {                      done(ok)                  } else {                      Box::new(move |tokenizer: &mut Tokenizer, code| { -                        tokenizer.call_multiple(check, b, c, d, e, None, done)(tokenizer, code) +                        tokenizer.call_multiple(check, b, c, d, e, f, None, done)(tokenizer, code)                      })                  }              }; | 
