diff options
Diffstat (limited to '')
| -rw-r--r-- | src/compiler.rs | 5 | ||||
| -rw-r--r-- | src/constant.rs | 6 | ||||
| -rw-r--r-- | src/construct/character_escape.rs | 2 | ||||
| -rw-r--r-- | src/construct/hard_break_escape.rs | 19 | ||||
| -rw-r--r-- | src/construct/hard_break_trailing.rs | 83 | ||||
| -rw-r--r-- | src/construct/mod.rs | 4 | ||||
| -rw-r--r-- | src/content/text.rs | 15 | ||||
| -rw-r--r-- | src/tokenizer.rs | 13 | ||||
| -rw-r--r-- | src/util/span.rs | 2 | 
9 files changed, 132 insertions, 17 deletions
| diff --git a/src/compiler.rs b/src/compiler.rs index 3aacca0..9f84a38 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -152,6 +152,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                  | TokenType::Whitespace                  | TokenType::HardBreakEscape                  | TokenType::HardBreakEscapeMarker +                | TokenType::HardBreakTrailing +                | TokenType::HardBreakTrailingSpace                  | TokenType::HtmlFlowData                  | TokenType::HtmlTextData                  | TokenType::CodeFencedFence @@ -195,6 +197,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                  | TokenType::CharacterReference                  | TokenType::CharacterReferenceMarkerSemi                  | TokenType::HardBreakEscapeMarker +                | TokenType::HardBreakTrailingSpace                  | TokenType::Autolink                  | TokenType::AutolinkMarker => {}                  TokenType::HtmlFlow | TokenType::HtmlText => { @@ -211,7 +214,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                  TokenType::Paragraph => {                      buf_tail_mut(buffers).push("</p>".to_string());                  } -                TokenType::HardBreakEscape => { +                TokenType::HardBreakEscape | TokenType::HardBreakTrailing => {                      buf_tail_mut(buffers).push("<br />".to_string());                  }                  TokenType::CodeIndented | TokenType::CodeFenced => { diff --git a/src/constant.rs b/src/constant.rs index d2fb238..ff9e62e 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -44,6 +44,12 @@ pub const AUTOLINK_SCHEME_SIZE_MAX: usize = 32;  /// [autolink]: crate::construct::autolink  pub const AUTOLINK_DOMAIN_SIZE_MAX: usize = 63; +/// The number of spaces needed, before a line ending, for a [hard break +/// (trailing)][hard_break_trailing] to form. +/// +/// [hard_break_trailing]: crate::construct::hard_break_trailing +pub const HARD_BREAK_PREFIX_SIZE_MIN: usize = 2; +  /// The number of markers needed for a [thematic break][thematic_break] to form.  ///  /// Like many things in markdown, the number is `3`. diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index baedd4b..743cbf8 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -14,7 +14,7 @@  //! [character reference][character_reference] instead  //! (as in, `&`, `{`, or say `	`).  //! It is also possible to escape a line ending in text with a similar -//! construct: a [hard break escape][hard_break_escape] is a backslash followed +//! construct: a [hard break (escape)][hard_break_escape] is a backslash followed  //! by a line ending (that is part of the construct instead of ending it).  //!  //! ## References diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index a7712d6..51da953 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -1,4 +1,4 @@ -//! Hard break escapes are a construct that occurs in the  [text][] content +//! Hard break (escape) is a construct that occurs in the  [text][] content  //! type.  //!  //! They’re formed with the following BNF: @@ -8,6 +8,15 @@  //! ; instead of ending it).  //! hard_break_escape ::= '\\'  //! ``` +//! +//! Hard breaks in markdown relate to the HTML element `<br>`. +//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. +//! +//! It is also possible to create a hard break with a +//! [hard break (trailing)][hard_break_trailing]. +//! That construct is not recommended because trailing spaces are typically +//! invisible in editors, or even automatically removed, making them to use. +//!  //! It is also possible to escape punctuation characters with a similar  //! construct: a [character escape][character_escape] is a backslash followed  //! by an ASCII punctuation character. @@ -22,12 +31,12 @@  //! [text]: crate::content::text  //! [character_escape]: crate::construct::character_escape  //! [character_reference]: crate::construct::character_reference -//! -//! <!-- To do: link `hard_break_escape` --> +//! [hard_break_trailing]: crate::construct::hard_break_trailing +//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; -/// Start of a hard break escape. +/// Start of a hard break (escape).  ///  /// ```markdown  /// a|\ @@ -45,7 +54,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      }  } -/// At the end of a hard break escape, after `\`. +/// At the end of a hard break (escape), after `\`.  ///  /// ```markdown  /// a\| diff --git a/src/construct/hard_break_trailing.rs b/src/construct/hard_break_trailing.rs new file mode 100644 index 0000000..46337c5 --- /dev/null +++ b/src/construct/hard_break_trailing.rs @@ -0,0 +1,83 @@ +//! Hard break (trailing) is a construct that occurs in the  [text][] content +//! type. +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! ; Restriction: followed by a line ending  (that is part of the construct +//! ; instead of ending it). +//! hard_break_trailing ::= 2*' ' +//! ``` +//! +//! The minimum number of the spaces is defined in +//! [`HARD_BREAK_PREFIX_SIZE_MIN`][hard_break_prefix_size_min]. +//! +//! Hard breaks in markdown relate to the HTML element `<br>`. +//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. +//! +//! It is also possible to create a hard break with a similar construct: a +//! [hard break (escape)][hard_break_escape] is a backslash followed +//! by a line ending. +//! That construct is recommended because it is similar to a +//! [character escape][character_escape] and similar to how line endings can be +//! “escaped” in other languages. +//! Trailing spaces are typically invisible in editors, or even automatically +//! removed, making hard break (trailing) hard to use. +//! +//! ## References +//! +//! *   [`lib/initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js) +//! *   [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks) +//! +//! [text]: crate::content::text +//! [hard_break_escape]: crate::construct::hard_break_escape +//! [character_escape]: crate::construct::character_escape +//! [hard_break_prefix_size_min]: crate::constant::HARD_BREAK_PREFIX_SIZE_MIN +//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element + +use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of a hard break (trailing). +/// +/// ```markdown +/// a|  ␊ +/// b +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char(' ') => { +            tokenizer.enter(TokenType::HardBreakTrailing); +            tokenizer.enter(TokenType::HardBreakTrailingSpace); +            tokenizer.consume(code); +            (State::Fn(Box::new(|t, c| inside(t, c, 1))), None) +        } +        _ => (State::Nok, None), +    } +} + +/// Inside the hard break (trailing). +/// +/// ```markdown +/// a  |␊ +/// b +/// ``` +fn inside(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { +    match code { +        Code::Char(' ') => { +            tokenizer.consume(code); +            ( +                State::Fn(Box::new(move |t, c| inside(t, c, size + 1))), +                None, +            ) +        } +        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') +            if size >= HARD_BREAK_PREFIX_SIZE_MIN => +        { +            tokenizer.exit(TokenType::HardBreakTrailingSpace); +            tokenizer.exit(TokenType::HardBreakTrailing); +            (State::Ok, Some(vec![code])) +        } +        _ => (State::Nok, None), +    } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 27f4308..880d055 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -26,7 +26,8 @@  //! *   [code (text)][code_text]  //! *   content  //! *   definition -//! *   [hard break escape][hard_break_escape] +//! *   [hard break (escape)][hard_break_escape] +//! *   [hard break (trailing)][hard_break_trailing]  //! *   [heading (atx)][heading_atx]  //! *   heading (setext)  //! *   [html (flow)][html_flow] @@ -61,6 +62,7 @@ pub mod code_fenced;  pub mod code_indented;  pub mod code_text;  pub mod hard_break_escape; +pub mod hard_break_trailing;  pub mod heading_atx;  pub mod html_flow;  pub mod html_text; diff --git a/src/content/text.rs b/src/content/text.rs index d4d5493..f61b390 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -8,7 +8,8 @@  //! *   [Autolink][crate::construct::autolink]  //! *   Attention  //! *   [HTML (text)][crate::construct::html_text] -//! *   [Hard break escape][crate::construct::hard_break_escape] +//! *   [Hard break (escape)][crate::construct::hard_break_escape] +//! *   [Hard break (trailing)][crate::construct::hard_break_trailing]  //! *   [Code (text)][crate::construct::code_text]  //! *   Line ending  //! *   Label start (image) @@ -19,7 +20,8 @@  use crate::construct::{      autolink::start as autolink, character_escape::start as character_escape,      character_reference::start as character_reference, code_text::start as code_text, -    hard_break_escape::start as hard_break_escape, html_text::start as html_text, +    hard_break_escape::start as hard_break_escape, +    hard_break_trailing::start as hard_break_trailing, html_text::start as html_text,  };  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -35,10 +37,11 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::None => (State::Ok, None), -        _ => tokenizer.attempt_6( +        _ => tokenizer.attempt_7(              character_reference,              character_escape,              hard_break_escape, +            hard_break_trailing,              autolink,              html_text,              code_text, @@ -78,12 +81,12 @@ fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ```  fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code { -        Code::None => { +        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {              tokenizer.exit(TokenType::Data); -            (State::Ok, None) +            before_data(tokenizer, code)          }          // To do: somehow get these markers from constructs. -        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '&' | '<' | '\\' | '`') => { +        Code::Char(' ' | '&' | '<' | '\\' | '`') => {              tokenizer.exit(TokenType::Data);              start(tokenizer, code)          } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a63d209..da45ee5 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -62,6 +62,8 @@ pub enum TokenType {      HardBreakEscape,      HardBreakEscapeMarker, +    HardBreakTrailing, +    HardBreakTrailingSpace,      HtmlFlow,      HtmlFlowData, @@ -445,6 +447,7 @@ impl Tokenizer {              None,              None,              None, +            None,              done,          )      } @@ -464,12 +467,13 @@ impl Tokenizer {              None,              None,              None, +            None,              done,          )      }      #[allow(clippy::too_many_arguments, clippy::many_single_char_names)] -    pub fn attempt_6( +    pub fn attempt_7(          &mut self,          a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, @@ -477,6 +481,7 @@ impl Tokenizer {          d: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          e: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          f: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, +        g: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          done: impl FnOnce(bool) -> Box<StateFn> + 'static,      ) -> Box<StateFn> {          self.call_multiple( @@ -487,6 +492,7 @@ impl Tokenizer {              Some(Box::new(d)),              Some(Box::new(e)),              Some(Box::new(f)), +            Some(Box::new(g)),              done,          )      } @@ -501,6 +507,7 @@ impl Tokenizer {          d: Option<Box<StateFn>>,          e: Option<Box<StateFn>>,          f: Option<Box<StateFn>>, +        g: Option<Box<StateFn>>,          done: impl FnOnce(bool) -> Box<StateFn> + 'static,      ) -> Box<StateFn> {          if let Some(head) = a { @@ -509,7 +516,9 @@ impl Tokenizer {                      done(ok)                  } else {                      Box::new(move |tokenizer: &mut Tokenizer, code| { -                        tokenizer.call_multiple(check, b, c, d, e, f, None, done)(tokenizer, code) +                        tokenizer.call_multiple(check, b, c, d, e, f, g, None, done)( +                            tokenizer, code, +                        )                      })                  }              }; diff --git a/src/util/span.rs b/src/util/span.rs index c48549b..02811cc 100644 --- a/src/util/span.rs +++ b/src/util/span.rs @@ -36,7 +36,7 @@ pub fn from_exit_event(events: &[Event], index: usize) -> Span {      assert_eq!(          exit.event_type,          EventType::Exit, -        "expected `get_span` to be called on `exit` event" +        "expected `from_exit_event` to be called on `exit` event"      );      let mut enter_index = index - 1; | 
