aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-16 11:34:35 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-16 11:34:35 +0200
commit58ba69452a25c3d4b2059c01cc6cd837159d2f90 (patch)
tree7f6d49449f564ec8606cc3881210d8b27df11961 /src/construct
parent7875ada79cea1194dc9e15acee36ed0700be70e6 (diff)
downloadmarkdown-rs-58ba69452a25c3d4b2059c01cc6cd837159d2f90.tar.gz
markdown-rs-58ba69452a25c3d4b2059c01cc6cd837159d2f90.tar.bz2
markdown-rs-58ba69452a25c3d4b2059c01cc6cd837159d2f90.zip
Add support for hard break escape
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/character_escape.rs11
-rw-r--r--src/construct/hard_break_escape.rs61
-rw-r--r--src/construct/mod.rs3
3 files changed, 68 insertions, 7 deletions
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index 7bab42d..baedd4b 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -11,11 +11,11 @@
//! slash, or a slash followed by anything other than an ASCII punctuation
//! character, is exactly that: just a slash.
//! To escape (most) arbitrary characters, use a
-//! [character reference][] instead
+//! [character reference][character_reference] instead
//! (as in, `&amp;`, `&#123;`, or say `&#x9;`).
//! It is also possible to escape a line ending in text with a similar
-//! construct: a backslash followed by a line ending (that is part of the
-//! construct instead of ending it).
+//! construct: a [hard break escape][hard_break_escape] is a backslash followed
+//! by a line ending (that is part of the construct instead of ending it).
//!
//! ## References
//!
@@ -24,9 +24,8 @@
//!
//! [string]: crate::content::string
//! [text]: crate::content::text
-//! [character reference]: crate::construct::character_reference
-//!
-//! <!-- To do: link `hard_break_escape` -->
+//! [character_reference]: crate::construct::character_reference
+//! [hard_break_escape]: crate::construct::hard_break_escape
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
new file mode 100644
index 0000000..a7712d6
--- /dev/null
+++ b/src/construct/hard_break_escape.rs
@@ -0,0 +1,61 @@
+//! Hard break escapes are a construct that occurs in the [text][] content
+//! type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: followed by a line ending (that is part of the construct
+//! ; instead of ending it).
+//! hard_break_escape ::= '\\'
+//! ```
+//! It is also possible to escape punctuation characters with a similar
+//! construct: a [character escape][character_escape] is a backslash followed
+//! by an ASCII punctuation character.
+//! Arbitrary characters can be escaped with
+//! [character reference][character_reference]s.
+//!
+//! ## References
+//!
+//! * [`hard-break-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/hard-break-escape.js)
+//! * [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks)
+//!
+//! [text]: crate::content::text
+//! [character_escape]: crate::construct::character_escape
+//! [character_reference]: crate::construct::character_reference
+//!
+//! <!-- To do: link `hard_break_escape` -->
+
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a hard break escape.
+///
+/// ```markdown
+/// a|\
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('\\') => {
+ tokenizer.enter(TokenType::HardBreakEscape);
+ tokenizer.enter(TokenType::HardBreakEscapeMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::HardBreakEscapeMarker);
+ (State::Fn(Box::new(inside)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// At the end of a hard break escape, after `\`.
+///
+/// ```markdown
+/// a\|
+/// ```
+fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.exit(TokenType::HardBreakEscape);
+ (State::Ok, Some(vec![code]))
+ }
+ _ => (State::Nok, None),
+ }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 1fa57d5..27f4308 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -26,7 +26,7 @@
//! * [code (text)][code_text]
//! * content
//! * definition
-//! * hard break escape
+//! * [hard break escape][hard_break_escape]
//! * [heading (atx)][heading_atx]
//! * heading (setext)
//! * [html (flow)][html_flow]
@@ -60,6 +60,7 @@ pub mod character_reference;
pub mod code_fenced;
pub mod code_indented;
pub mod code_text;
+pub mod hard_break_escape;
pub mod heading_atx;
pub mod html_flow;
pub mod html_text;