aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-16 12:55:50 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-16 12:55:50 +0200
commit7350acc692a79d9d4cf56afbc53ac3c9f2a6237c (patch)
tree02f8b83230a40b509adf4b4872e313544c7fc80f /src/construct
parent58ba69452a25c3d4b2059c01cc6cd837159d2f90 (diff)
downloadmarkdown-rs-7350acc692a79d9d4cf56afbc53ac3c9f2a6237c.tar.gz
markdown-rs-7350acc692a79d9d4cf56afbc53ac3c9f2a6237c.tar.bz2
markdown-rs-7350acc692a79d9d4cf56afbc53ac3c9f2a6237c.zip
Add support for hard break (trailing)
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/character_escape.rs2
-rw-r--r--src/construct/hard_break_escape.rs19
-rw-r--r--src/construct/hard_break_trailing.rs83
-rw-r--r--src/construct/mod.rs4
4 files changed, 101 insertions, 7 deletions
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index baedd4b..743cbf8 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -14,7 +14,7 @@
//! [character reference][character_reference] instead
//! (as in, `&amp;`, `&#123;`, or say `&#x9;`).
//! It is also possible to escape a line ending in text with a similar
-//! construct: a [hard break escape][hard_break_escape] is a backslash followed
+//! construct: a [hard break (escape)][hard_break_escape] is a backslash followed
//! by a line ending (that is part of the construct instead of ending it).
//!
//! ## References
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index a7712d6..51da953 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -1,4 +1,4 @@
-//! Hard break escapes are a construct that occurs in the [text][] content
+//! Hard break (escape) is a construct that occurs in the [text][] content
//! type.
//!
//! They’re formed with the following BNF:
@@ -8,6 +8,15 @@
//! ; instead of ending it).
//! hard_break_escape ::= '\\'
//! ```
+//!
+//! Hard breaks in markdown relate to the HTML element `<br>`.
+//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info.
+//!
+//! It is also possible to create a hard break with a
+//! [hard break (trailing)][hard_break_trailing].
+//! That construct is not recommended because trailing spaces are typically
+//! invisible in editors, or even automatically removed, making them to use.
+//!
//! It is also possible to escape punctuation characters with a similar
//! construct: a [character escape][character_escape] is a backslash followed
//! by an ASCII punctuation character.
@@ -22,12 +31,12 @@
//! [text]: crate::content::text
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
-//!
-//! <!-- To do: link `hard_break_escape` -->
+//! [hard_break_trailing]: crate::construct::hard_break_trailing
+//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-/// Start of a hard break escape.
+/// Start of a hard break (escape).
///
/// ```markdown
/// a|\
@@ -45,7 +54,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// At the end of a hard break escape, after `\`.
+/// At the end of a hard break (escape), after `\`.
///
/// ```markdown
/// a\|
diff --git a/src/construct/hard_break_trailing.rs b/src/construct/hard_break_trailing.rs
new file mode 100644
index 0000000..46337c5
--- /dev/null
+++ b/src/construct/hard_break_trailing.rs
@@ -0,0 +1,83 @@
+//! Hard break (trailing) is a construct that occurs in the [text][] content
+//! type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: followed by a line ending (that is part of the construct
+//! ; instead of ending it).
+//! hard_break_trailing ::= 2*' '
+//! ```
+//!
+//! The minimum number of the spaces is defined in
+//! [`HARD_BREAK_PREFIX_SIZE_MIN`][hard_break_prefix_size_min].
+//!
+//! Hard breaks in markdown relate to the HTML element `<br>`.
+//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info.
+//!
+//! It is also possible to create a hard break with a similar construct: a
+//! [hard break (escape)][hard_break_escape] is a backslash followed
+//! by a line ending.
+//! That construct is recommended because it is similar to a
+//! [character escape][character_escape] and similar to how line endings can be
+//! “escaped” in other languages.
+//! Trailing spaces are typically invisible in editors, or even automatically
+//! removed, making hard break (trailing) hard to use.
+//!
+//! ## References
+//!
+//! * [`lib/initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js)
+//! * [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks)
+//!
+//! [text]: crate::content::text
+//! [hard_break_escape]: crate::construct::hard_break_escape
+//! [character_escape]: crate::construct::character_escape
+//! [hard_break_prefix_size_min]: crate::constant::HARD_BREAK_PREFIX_SIZE_MIN
+//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element
+
+use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a hard break (trailing).
+///
+/// ```markdown
+/// a| ␊
+/// b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(' ') => {
+ tokenizer.enter(TokenType::HardBreakTrailing);
+ tokenizer.enter(TokenType::HardBreakTrailingSpace);
+ tokenizer.consume(code);
+ (State::Fn(Box::new(|t, c| inside(t, c, 1))), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// Inside the hard break (trailing).
+///
+/// ```markdown
+/// a |␊
+/// b
+/// ```
+fn inside(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+ match code {
+ Code::Char(' ') => {
+ tokenizer.consume(code);
+ (
+ State::Fn(Box::new(move |t, c| inside(t, c, size + 1))),
+ None,
+ )
+ }
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
+ if size >= HARD_BREAK_PREFIX_SIZE_MIN =>
+ {
+ tokenizer.exit(TokenType::HardBreakTrailingSpace);
+ tokenizer.exit(TokenType::HardBreakTrailing);
+ (State::Ok, Some(vec![code]))
+ }
+ _ => (State::Nok, None),
+ }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 27f4308..880d055 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -26,7 +26,8 @@
//! * [code (text)][code_text]
//! * content
//! * definition
-//! * [hard break escape][hard_break_escape]
+//! * [hard break (escape)][hard_break_escape]
+//! * [hard break (trailing)][hard_break_trailing]
//! * [heading (atx)][heading_atx]
//! * heading (setext)
//! * [html (flow)][html_flow]
@@ -61,6 +62,7 @@ pub mod code_fenced;
pub mod code_indented;
pub mod code_text;
pub mod hard_break_escape;
+pub mod hard_break_trailing;
pub mod heading_atx;
pub mod html_flow;
pub mod html_text;