From 58ba69452a25c3d4b2059c01cc6cd837159d2f90 Mon Sep 17 00:00:00 2001
From: Titus Wormer
Date: Thu, 16 Jun 2022 11:34:35 +0200
Subject: Add support for hard break escape
---
src/compiler.rs | 6 ++++
src/construct/character_escape.rs | 11 ++++---
src/construct/hard_break_escape.rs | 61 ++++++++++++++++++++++++++++++++++++++
src/construct/mod.rs | 3 +-
src/content/text.rs | 7 +++--
src/tokenizer.rs | 14 +++++++--
6 files changed, 89 insertions(+), 13 deletions(-)
create mode 100644 src/construct/hard_break_escape.rs
(limited to 'src')
diff --git a/src/compiler.rs b/src/compiler.rs
index 6127231..3aacca0 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -150,6 +150,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
| TokenType::BlankLineEnding
| TokenType::BlankLineWhitespace
| TokenType::Whitespace
+ | TokenType::HardBreakEscape
+ | TokenType::HardBreakEscapeMarker
| TokenType::HtmlFlowData
| TokenType::HtmlTextData
| TokenType::CodeFencedFence
@@ -192,6 +194,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
| TokenType::CharacterEscapeMarker
| TokenType::CharacterReference
| TokenType::CharacterReferenceMarkerSemi
+ | TokenType::HardBreakEscapeMarker
| TokenType::Autolink
| TokenType::AutolinkMarker => {}
TokenType::HtmlFlow | TokenType::HtmlText => {
@@ -208,6 +211,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
TokenType::Paragraph => {
buf_tail_mut(buffers).push("
".to_string());
}
+ TokenType::HardBreakEscape => {
+ buf_tail_mut(buffers).push("
".to_string());
+ }
TokenType::CodeIndented | TokenType::CodeFenced => {
let seen_data =
code_flow_seen_data.expect("`code_flow_seen_data` must be defined");
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index 7bab42d..baedd4b 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -11,11 +11,11 @@
//! slash, or a slash followed by anything other than an ASCII punctuation
//! character, is exactly that: just a slash.
//! To escape (most) arbitrary characters, use a
-//! [character reference][] instead
+//! [character reference][character_reference] instead
//! (as in, `&`, `{`, or say ` `).
//! It is also possible to escape a line ending in text with a similar
-//! construct: a backslash followed by a line ending (that is part of the
-//! construct instead of ending it).
+//! construct: a [hard break escape][hard_break_escape] is a backslash followed
+//! by a line ending (that is part of the construct instead of ending it).
//!
//! ## References
//!
@@ -24,9 +24,8 @@
//!
//! [string]: crate::content::string
//! [text]: crate::content::text
-//! [character reference]: crate::construct::character_reference
-//!
-//!
+//! [character_reference]: crate::construct::character_reference
+//! [hard_break_escape]: crate::construct::hard_break_escape
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
new file mode 100644
index 0000000..a7712d6
--- /dev/null
+++ b/src/construct/hard_break_escape.rs
@@ -0,0 +1,61 @@
+//! Hard break escapes are a construct that occurs in the [text][] content
+//! type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: followed by a line ending (that is part of the construct
+//! ; instead of ending it).
+//! hard_break_escape ::= '\\'
+//! ```
+//! It is also possible to escape punctuation characters with a similar
+//! construct: a [character escape][character_escape] is a backslash followed
+//! by an ASCII punctuation character.
+//! Arbitrary characters can be escaped with
+//! [character reference][character_reference]s.
+//!
+//! ## References
+//!
+//! * [`hard-break-escape.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/hard-break-escape.js)
+//! * [*§ 6.7 Hard line breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#hard-line-breaks)
+//!
+//! [text]: crate::content::text
+//! [character_escape]: crate::construct::character_escape
+//! [character_reference]: crate::construct::character_reference
+//!
+//!
+
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of a hard break escape.
+///
+/// ```markdown
+/// a|\
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('\\') => {
+ tokenizer.enter(TokenType::HardBreakEscape);
+ tokenizer.enter(TokenType::HardBreakEscapeMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::HardBreakEscapeMarker);
+ (State::Fn(Box::new(inside)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// At the end of a hard break escape, after `\`.
+///
+/// ```markdown
+/// a\|
+/// ```
+fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.exit(TokenType::HardBreakEscape);
+ (State::Ok, Some(vec![code]))
+ }
+ _ => (State::Nok, None),
+ }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 1fa57d5..27f4308 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -26,7 +26,7 @@
//! * [code (text)][code_text]
//! * content
//! * definition
-//! * hard break escape
+//! * [hard break escape][hard_break_escape]
//! * [heading (atx)][heading_atx]
//! * heading (setext)
//! * [html (flow)][html_flow]
@@ -60,6 +60,7 @@ pub mod character_reference;
pub mod code_fenced;
pub mod code_indented;
pub mod code_text;
+pub mod hard_break_escape;
pub mod heading_atx;
pub mod html_flow;
pub mod html_text;
diff --git a/src/content/text.rs b/src/content/text.rs
index 9d510cb..d4d5493 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -8,7 +8,7 @@
//! * [Autolink][crate::construct::autolink]
//! * Attention
//! * [HTML (text)][crate::construct::html_text]
-//! * Hard break escape
+//! * [Hard break escape][crate::construct::hard_break_escape]
//! * [Code (text)][crate::construct::code_text]
//! * Line ending
//! * Label start (image)
@@ -19,7 +19,7 @@
use crate::construct::{
autolink::start as autolink, character_escape::start as character_escape,
character_reference::start as character_reference, code_text::start as code_text,
- html_text::start as html_text,
+ hard_break_escape::start as hard_break_escape, html_text::start as html_text,
};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -35,9 +35,10 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Ok, None),
- _ => tokenizer.attempt_5(
+ _ => tokenizer.attempt_6(
character_reference,
character_escape,
+ hard_break_escape,
autolink,
html_text,
code_text,
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index c5df42b..a63d209 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -60,6 +60,9 @@ pub enum TokenType {
Data,
+ HardBreakEscape,
+ HardBreakEscapeMarker,
+
HtmlFlow,
HtmlFlowData,
@@ -441,6 +444,7 @@ impl Tokenizer {
None,
None,
None,
+ None,
done,
)
}
@@ -459,18 +463,20 @@ impl Tokenizer {
Some(Box::new(c)),
None,
None,
+ None,
done,
)
}
- #[allow(clippy::many_single_char_names)]
- pub fn attempt_5(
+ #[allow(clippy::too_many_arguments, clippy::many_single_char_names)]
+ pub fn attempt_6(
&mut self,
a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
d: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
e: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ f: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
done: impl FnOnce(bool) -> Box + 'static,
) -> Box {
self.call_multiple(
@@ -480,6 +486,7 @@ impl Tokenizer {
Some(Box::new(c)),
Some(Box::new(d)),
Some(Box::new(e)),
+ Some(Box::new(f)),
done,
)
}
@@ -493,6 +500,7 @@ impl Tokenizer {
c: Option>,
d: Option>,
e: Option>,
+ f: Option>,
done: impl FnOnce(bool) -> Box + 'static,
) -> Box {
if let Some(head) = a {
@@ -501,7 +509,7 @@ impl Tokenizer {
done(ok)
} else {
Box::new(move |tokenizer: &mut Tokenizer, code| {
- tokenizer.call_multiple(check, b, c, d, e, None, done)(tokenizer, code)
+ tokenizer.call_multiple(check, b, c, d, e, f, None, done)(tokenizer, code)
})
}
};
--
cgit