From 7350acc692a79d9d4cf56afbc53ac3c9f2a6237c Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 16 Jun 2022 12:55:50 +0200 Subject: Add support for hard break (trailing) --- src/construct/character_escape.rs | 2 +- src/construct/hard_break_escape.rs | 19 ++++++--- src/construct/hard_break_trailing.rs | 83 ++++++++++++++++++++++++++++++++++++ src/construct/mod.rs | 4 +- 4 files changed, 101 insertions(+), 7 deletions(-) create mode 100644 src/construct/hard_break_trailing.rs (limited to 'src/construct') diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index baedd4b..743cbf8 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -14,7 +14,7 @@ //! [character reference][character_reference] instead //! (as in, `&`, `{`, or say ` `). //! It is also possible to escape a line ending in text with a similar -//! construct: a [hard break escape][hard_break_escape] is a backslash followed +//! construct: a [hard break (escape)][hard_break_escape] is a backslash followed //! by a line ending (that is part of the construct instead of ending it). //! //! ## References diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index a7712d6..51da953 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -1,4 +1,4 @@ -//! Hard break escapes are a construct that occurs in the [text][] content +//! Hard break (escape) is a construct that occurs in the [text][] content //! type. //! //! They’re formed with the following BNF: @@ -8,6 +8,15 @@ //! ; instead of ending it). //! hard_break_escape ::= '\\' //! ``` +//! +//! Hard breaks in markdown relate to the HTML element `
`. +//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. +//! +//! It is also possible to create a hard break with a +//! [hard break (trailing)][hard_break_trailing]. +//! That construct is not recommended because trailing spaces are typically +//! invisible in editors, or even automatically removed, making them to use. +//! //! It is also possible to escape punctuation characters with a similar //! construct: a [character escape][character_escape] is a backslash followed //! by an ASCII punctuation character. @@ -22,12 +31,12 @@ //! [text]: crate::content::text //! [character_escape]: crate::construct::character_escape //! [character_reference]: crate::construct::character_reference -//! -//! +//! [hard_break_trailing]: crate::construct::hard_break_trailing +//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; -/// Start of a hard break escape. +/// Start of a hard break (escape). /// /// ```markdown /// a|\ @@ -45,7 +54,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// At the end of a hard break escape, after `\`. +/// At the end of a hard break (escape), after `\`. /// /// ```markdown /// a\| diff --git a/src/construct/hard_break_trailing.rs b/src/construct/hard_break_trailing.rs new file mode 100644 index 0000000..46337c5 --- /dev/null +++ b/src/construct/hard_break_trailing.rs @@ -0,0 +1,83 @@ +//! Hard break (trailing) is a construct that occurs in the [text][] content +//! type. +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! ; Restriction: followed by a line ending (that is part of the construct +//! ; instead of ending it). +//! hard_break_trailing ::= 2*' ' +//! ``` +//! +//! The minimum number of the spaces is defined in +//! [`HARD_BREAK_PREFIX_SIZE_MIN`][hard_break_prefix_size_min]. +//! +//! Hard breaks in markdown relate to the HTML element `
`. +//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. +//! +//! It is also possible to create a hard break with a similar construct: a +//! [hard break (escape)][hard_break_escape] is a backslash followed +//! by a line ending. +//! That construct is recommended because it is similar to a +//! [character escape][character_escape] and similar to how line endings can be +//! “escaped” in other languages. +//! Trailing spaces are typically invisible in editors, or even automatically +//! removed, making hard break (trailing) hard to use. +//! +//! ## References +//! +//! * [`lib/initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js) +//! * [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks) +//! +//! [text]: crate::content::text +//! [hard_break_escape]: crate::construct::hard_break_escape +//! [character_escape]: crate::construct::character_escape +//! [hard_break_prefix_size_min]: crate::constant::HARD_BREAK_PREFIX_SIZE_MIN +//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element + +use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of a hard break (trailing). +/// +/// ```markdown +/// a| ␊ +/// b +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char(' ') => { + tokenizer.enter(TokenType::HardBreakTrailing); + tokenizer.enter(TokenType::HardBreakTrailingSpace); + tokenizer.consume(code); + (State::Fn(Box::new(|t, c| inside(t, c, 1))), None) + } + _ => (State::Nok, None), + } +} + +/// Inside the hard break (trailing). +/// +/// ```markdown +/// a |␊ +/// b +/// ``` +fn inside(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { + match code { + Code::Char(' ') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| inside(t, c, size + 1))), + None, + ) + } + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + if size >= HARD_BREAK_PREFIX_SIZE_MIN => + { + tokenizer.exit(TokenType::HardBreakTrailingSpace); + tokenizer.exit(TokenType::HardBreakTrailing); + (State::Ok, Some(vec![code])) + } + _ => (State::Nok, None), + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 27f4308..880d055 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -26,7 +26,8 @@ //! * [code (text)][code_text] //! * content //! * definition -//! * [hard break escape][hard_break_escape] +//! * [hard break (escape)][hard_break_escape] +//! * [hard break (trailing)][hard_break_trailing] //! * [heading (atx)][heading_atx] //! * heading (setext) //! * [html (flow)][html_flow] @@ -61,6 +62,7 @@ pub mod code_fenced; pub mod code_indented; pub mod code_text; pub mod hard_break_escape; +pub mod hard_break_trailing; pub mod heading_atx; pub mod html_flow; pub mod html_text; -- cgit