From 58ba69452a25c3d4b2059c01cc6cd837159d2f90 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 16 Jun 2022 11:34:35 +0200 Subject: Add support for hard break escape --- src/construct/character_escape.rs | 11 ++++--- src/construct/hard_break_escape.rs | 61 ++++++++++++++++++++++++++++++++++++++ src/construct/mod.rs | 3 +- 3 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 src/construct/hard_break_escape.rs (limited to 'src/construct') diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 7bab42d..baedd4b 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -11,11 +11,11 @@ //! slash, or a slash followed by anything other than an ASCII punctuation //! character, is exactly that: just a slash. //! To escape (most) arbitrary characters, use a -//! [character reference][] instead +//! [character reference][character_reference] instead //! (as in, `&`, `{`, or say ` `). //! It is also possible to escape a line ending in text with a similar -//! construct: a backslash followed by a line ending (that is part of the -//! construct instead of ending it). +//! construct: a [hard break escape][hard_break_escape] is a backslash followed +//! by a line ending (that is part of the construct instead of ending it). //! //! ## References //! @@ -24,9 +24,8 @@ //! //! [string]: crate::content::string //! [text]: crate::content::text -//! [character reference]: crate::construct::character_reference -//! -//! +//! [character_reference]: crate::construct::character_reference +//! [hard_break_escape]: crate::construct::hard_break_escape use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs new file mode 100644 index 0000000..a7712d6 --- /dev/null +++ b/src/construct/hard_break_escape.rs @@ -0,0 +1,61 @@ +//! Hard break escapes are a construct that occurs in the [text][] content +//! type. +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! ; Restriction: followed by a line ending (that is part of the construct +//! ; instead of ending it). +//! hard_break_escape ::= '\\' +//! ``` +//! It is also possible to escape punctuation characters with a similar +//! construct: a [character escape][character_escape] is a backslash followed +//! by an ASCII punctuation character. +//! Arbitrary characters can be escaped with +//! [character reference][character_reference]s. +//! +//! ## References +//! +//! * [`hard-break-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/hard-break-escape.js) +//! * [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks) +//! +//! [text]: crate::content::text +//! [character_escape]: crate::construct::character_escape +//! [character_reference]: crate::construct::character_reference +//! +//! + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of a hard break escape. +/// +/// ```markdown +/// a|\ +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('\\') => { + tokenizer.enter(TokenType::HardBreakEscape); + tokenizer.enter(TokenType::HardBreakEscapeMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::HardBreakEscapeMarker); + (State::Fn(Box::new(inside)), None) + } + _ => (State::Nok, None), + } +} + +/// At the end of a hard break escape, after `\`. +/// +/// ```markdown +/// a\| +/// ``` +fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.exit(TokenType::HardBreakEscape); + (State::Ok, Some(vec![code])) + } + _ => (State::Nok, None), + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 1fa57d5..27f4308 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -26,7 +26,7 @@ //! * [code (text)][code_text] //! * content //! * definition -//! * hard break escape +//! * [hard break escape][hard_break_escape] //! * [heading (atx)][heading_atx] //! * heading (setext) //! * [html (flow)][html_flow] @@ -60,6 +60,7 @@ pub mod character_reference; pub mod code_fenced; pub mod code_indented; pub mod code_text; +pub mod hard_break_escape; pub mod heading_atx; pub mod html_flow; pub mod html_text; -- cgit