diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-16 12:55:50 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-16 12:55:50 +0200 |
commit | 7350acc692a79d9d4cf56afbc53ac3c9f2a6237c (patch) | |
tree | 02f8b83230a40b509adf4b4872e313544c7fc80f /src | |
parent | 58ba69452a25c3d4b2059c01cc6cd837159d2f90 (diff) | |
download | markdown-rs-7350acc692a79d9d4cf56afbc53ac3c9f2a6237c.tar.gz markdown-rs-7350acc692a79d9d4cf56afbc53ac3c9f2a6237c.tar.bz2 markdown-rs-7350acc692a79d9d4cf56afbc53ac3c9f2a6237c.zip |
Add support for hard break (trailing)
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler.rs | 5 | ||||
-rw-r--r-- | src/constant.rs | 6 | ||||
-rw-r--r-- | src/construct/character_escape.rs | 2 | ||||
-rw-r--r-- | src/construct/hard_break_escape.rs | 19 | ||||
-rw-r--r-- | src/construct/hard_break_trailing.rs | 83 | ||||
-rw-r--r-- | src/construct/mod.rs | 4 | ||||
-rw-r--r-- | src/content/text.rs | 15 | ||||
-rw-r--r-- | src/tokenizer.rs | 13 | ||||
-rw-r--r-- | src/util/span.rs | 2 |
9 files changed, 132 insertions, 17 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 3aacca0..9f84a38 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -152,6 +152,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::Whitespace | TokenType::HardBreakEscape | TokenType::HardBreakEscapeMarker + | TokenType::HardBreakTrailing + | TokenType::HardBreakTrailingSpace | TokenType::HtmlFlowData | TokenType::HtmlTextData | TokenType::CodeFencedFence @@ -195,6 +197,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CharacterReference | TokenType::CharacterReferenceMarkerSemi | TokenType::HardBreakEscapeMarker + | TokenType::HardBreakTrailingSpace | TokenType::Autolink | TokenType::AutolinkMarker => {} TokenType::HtmlFlow | TokenType::HtmlText => { @@ -211,7 +214,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St TokenType::Paragraph => { buf_tail_mut(buffers).push("</p>".to_string()); } - TokenType::HardBreakEscape => { + TokenType::HardBreakEscape | TokenType::HardBreakTrailing => { buf_tail_mut(buffers).push("<br />".to_string()); } TokenType::CodeIndented | TokenType::CodeFenced => { diff --git a/src/constant.rs b/src/constant.rs index d2fb238..ff9e62e 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -44,6 +44,12 @@ pub const AUTOLINK_SCHEME_SIZE_MAX: usize = 32; /// [autolink]: crate::construct::autolink pub const AUTOLINK_DOMAIN_SIZE_MAX: usize = 63; +/// The number of spaces needed, before a line ending, for a [hard break +/// (trailing)][hard_break_trailing] to form. +/// +/// [hard_break_trailing]: crate::construct::hard_break_trailing +pub const HARD_BREAK_PREFIX_SIZE_MIN: usize = 2; + /// The number of markers needed for a [thematic break][thematic_break] to form. /// /// Like many things in markdown, the number is `3`. diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index baedd4b..743cbf8 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -14,7 +14,7 @@ //! [character reference][character_reference] instead //! (as in, `&`, `{`, or say `	`). //! It is also possible to escape a line ending in text with a similar -//! construct: a [hard break escape][hard_break_escape] is a backslash followed +//! construct: a [hard break (escape)][hard_break_escape] is a backslash followed //! by a line ending (that is part of the construct instead of ending it). //! //! ## References diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index a7712d6..51da953 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -1,4 +1,4 @@ -//! Hard break escapes are a construct that occurs in the [text][] content +//! Hard break (escape) is a construct that occurs in the [text][] content //! type. //! //! They’re formed with the following BNF: @@ -8,6 +8,15 @@ //! ; instead of ending it). //! hard_break_escape ::= '\\' //! ``` +//! +//! Hard breaks in markdown relate to the HTML element `<br>`. +//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. +//! +//! It is also possible to create a hard break with a +//! [hard break (trailing)][hard_break_trailing]. +//! That construct is not recommended because trailing spaces are typically +//! invisible in editors, or even automatically removed, making them to use. +//! //! It is also possible to escape punctuation characters with a similar //! construct: a [character escape][character_escape] is a backslash followed //! by an ASCII punctuation character. @@ -22,12 +31,12 @@ //! [text]: crate::content::text //! [character_escape]: crate::construct::character_escape //! [character_reference]: crate::construct::character_reference -//! -//! <!-- To do: link `hard_break_escape` --> +//! [hard_break_trailing]: crate::construct::hard_break_trailing +//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; -/// Start of a hard break escape. +/// Start of a hard break (escape). /// /// ```markdown /// a|\ @@ -45,7 +54,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// At the end of a hard break escape, after `\`. +/// At the end of a hard break (escape), after `\`. /// /// ```markdown /// a\| diff --git a/src/construct/hard_break_trailing.rs b/src/construct/hard_break_trailing.rs new file mode 100644 index 0000000..46337c5 --- /dev/null +++ b/src/construct/hard_break_trailing.rs @@ -0,0 +1,83 @@ +//! Hard break (trailing) is a construct that occurs in the [text][] content +//! type. +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! ; Restriction: followed by a line ending (that is part of the construct +//! ; instead of ending it). +//! hard_break_trailing ::= 2*' ' +//! ``` +//! +//! The minimum number of the spaces is defined in +//! [`HARD_BREAK_PREFIX_SIZE_MIN`][hard_break_prefix_size_min]. +//! +//! Hard breaks in markdown relate to the HTML element `<br>`. +//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. +//! +//! It is also possible to create a hard break with a similar construct: a +//! [hard break (escape)][hard_break_escape] is a backslash followed +//! by a line ending. +//! That construct is recommended because it is similar to a +//! [character escape][character_escape] and similar to how line endings can be +//! “escaped” in other languages. +//! Trailing spaces are typically invisible in editors, or even automatically +//! removed, making hard break (trailing) hard to use. +//! +//! ## References +//! +//! * [`lib/initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js) +//! * [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks) +//! +//! [text]: crate::content::text +//! [hard_break_escape]: crate::construct::hard_break_escape +//! [character_escape]: crate::construct::character_escape +//! [hard_break_prefix_size_min]: crate::constant::HARD_BREAK_PREFIX_SIZE_MIN +//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element + +use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of a hard break (trailing). +/// +/// ```markdown +/// a| ␊ +/// b +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char(' ') => { + tokenizer.enter(TokenType::HardBreakTrailing); + tokenizer.enter(TokenType::HardBreakTrailingSpace); + tokenizer.consume(code); + (State::Fn(Box::new(|t, c| inside(t, c, 1))), None) + } + _ => (State::Nok, None), + } +} + +/// Inside the hard break (trailing). +/// +/// ```markdown +/// a |␊ +/// b +/// ``` +fn inside(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { + match code { + Code::Char(' ') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| inside(t, c, size + 1))), + None, + ) + } + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + if size >= HARD_BREAK_PREFIX_SIZE_MIN => + { + tokenizer.exit(TokenType::HardBreakTrailingSpace); + tokenizer.exit(TokenType::HardBreakTrailing); + (State::Ok, Some(vec![code])) + } + _ => (State::Nok, None), + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 27f4308..880d055 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -26,7 +26,8 @@ //! * [code (text)][code_text] //! * content //! * definition -//! * [hard break escape][hard_break_escape] +//! * [hard break (escape)][hard_break_escape] +//! * [hard break (trailing)][hard_break_trailing] //! * [heading (atx)][heading_atx] //! * heading (setext) //! * [html (flow)][html_flow] @@ -61,6 +62,7 @@ pub mod code_fenced; pub mod code_indented; pub mod code_text; pub mod hard_break_escape; +pub mod hard_break_trailing; pub mod heading_atx; pub mod html_flow; pub mod html_text; diff --git a/src/content/text.rs b/src/content/text.rs index d4d5493..f61b390 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -8,7 +8,8 @@ //! * [Autolink][crate::construct::autolink] //! * Attention //! * [HTML (text)][crate::construct::html_text] -//! * [Hard break escape][crate::construct::hard_break_escape] +//! * [Hard break (escape)][crate::construct::hard_break_escape] +//! * [Hard break (trailing)][crate::construct::hard_break_trailing] //! * [Code (text)][crate::construct::code_text] //! * Line ending //! * Label start (image) @@ -19,7 +20,8 @@ use crate::construct::{ autolink::start as autolink, character_escape::start as character_escape, character_reference::start as character_reference, code_text::start as code_text, - hard_break_escape::start as hard_break_escape, html_text::start as html_text, + hard_break_escape::start as hard_break_escape, + hard_break_trailing::start as hard_break_trailing, html_text::start as html_text, }; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -35,10 +37,11 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Ok, None), - _ => tokenizer.attempt_6( + _ => tokenizer.attempt_7( character_reference, character_escape, hard_break_escape, + hard_break_trailing, autolink, html_text, code_text, @@ -78,12 +81,12 @@ fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { tokenizer.exit(TokenType::Data); - (State::Ok, None) + before_data(tokenizer, code) } // To do: somehow get these markers from constructs. - Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '&' | '<' | '\\' | '`') => { + Code::Char(' ' | '&' | '<' | '\\' | '`') => { tokenizer.exit(TokenType::Data); start(tokenizer, code) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a63d209..da45ee5 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -62,6 +62,8 @@ pub enum TokenType { HardBreakEscape, HardBreakEscapeMarker, + HardBreakTrailing, + HardBreakTrailingSpace, HtmlFlow, HtmlFlowData, @@ -445,6 +447,7 @@ impl Tokenizer { None, None, None, + None, done, ) } @@ -464,12 +467,13 @@ impl Tokenizer { None, None, None, + None, done, ) } #[allow(clippy::too_many_arguments, clippy::many_single_char_names)] - pub fn attempt_6( + pub fn attempt_7( &mut self, a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, @@ -477,6 +481,7 @@ impl Tokenizer { d: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, e: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, f: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + g: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, done: impl FnOnce(bool) -> Box<StateFn> + 'static, ) -> Box<StateFn> { self.call_multiple( @@ -487,6 +492,7 @@ impl Tokenizer { Some(Box::new(d)), Some(Box::new(e)), Some(Box::new(f)), + Some(Box::new(g)), done, ) } @@ -501,6 +507,7 @@ impl Tokenizer { d: Option<Box<StateFn>>, e: Option<Box<StateFn>>, f: Option<Box<StateFn>>, + g: Option<Box<StateFn>>, done: impl FnOnce(bool) -> Box<StateFn> + 'static, ) -> Box<StateFn> { if let Some(head) = a { @@ -509,7 +516,9 @@ impl Tokenizer { done(ok) } else { Box::new(move |tokenizer: &mut Tokenizer, code| { - tokenizer.call_multiple(check, b, c, d, e, f, None, done)(tokenizer, code) + tokenizer.call_multiple(check, b, c, d, e, f, g, None, done)( + tokenizer, code, + ) }) } }; diff --git a/src/util/span.rs b/src/util/span.rs index c48549b..02811cc 100644 --- a/src/util/span.rs +++ b/src/util/span.rs @@ -36,7 +36,7 @@ pub fn from_exit_event(events: &[Event], index: usize) -> Span { assert_eq!( exit.event_type, EventType::Exit, - "expected `get_span` to be called on `exit` event" + "expected `from_exit_event` to be called on `exit` event" ); let mut enter_index = index - 1; |