aboutsummaryrefslogtreecommitdiffstats
path: root/src/content
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-30 16:35:13 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-30 16:35:13 +0200
commitbe62b2e29a61774100f676cfdd9b100cadf1905f (patch)
tree4349e259fc0150526dc32242b92d85218091fca5 /src/content
parent13588776d65601a41ddfce85f618e8aaa55951cc (diff)
downloadmarkdown-rs-be62b2e29a61774100f676cfdd9b100cadf1905f.tar.gz
markdown-rs-be62b2e29a61774100f676cfdd9b100cadf1905f.tar.bz2
markdown-rs-be62b2e29a61774100f676cfdd9b100cadf1905f.zip
Add support for trimming whitespace around string, text
This commit introduces trimming initial and final whitespace around the whole string or text, or around line endings inside that string or text. * Add `register_resolver_before`, to run resolvers earlier than others, used for labels * Add resolver to merge `data` events, which are the most frequent token that occurs, and can happen adjacently. In `micromark-js` this sped up parsing a lot * Fix a bug where a virtual space was not seen as an okay event * Refactor to enable all turned off whitespace tests
Diffstat (limited to 'src/content')
-rw-r--r--src/content/string.rs17
-rw-r--r--src/content/text.rs22
2 files changed, 25 insertions, 14 deletions
diff --git a/src/content/string.rs b/src/content/string.rs
index 53e88b1..cc8ee53 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -14,13 +14,16 @@
use crate::construct::{
character_escape::start as character_escape, character_reference::start as character_reference,
- partial_data::start as data,
+ partial_data::start as data, partial_whitespace::whitespace,
};
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
-const MARKERS: [Code; 2] = [
- Code::Char('&'), // `character_reference`
- Code::Char('\\'), // `character_escape`
+const MARKERS: [Code; 5] = [
+ Code::VirtualSpace, // `whitespace`
+ Code::Char('\t'), // `whitespace`
+ Code::Char(' '), // `whitespace`
+ Code::Char('&'), // `character_reference`
+ Code::Char('\\'), // `character_escape`
];
/// Before string.
@@ -34,7 +37,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Ok, None),
_ => tokenizer.attempt_n(
- vec![Box::new(character_reference), Box::new(character_escape)],
+ vec![
+ Box::new(character_reference),
+ Box::new(character_escape),
+ Box::new(whitespace),
+ ],
|ok| Box::new(if ok { start } else { before_data }),
)(tokenizer, code),
}
diff --git a/src/content/text.rs b/src/content/text.rs
index 183072e..c3f4e1b 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -24,18 +24,21 @@ use crate::construct::{
hard_break_trailing::start as hard_break_trailing, html_text::start as html_text,
label_end::start as label_end, label_start_image::start as label_start_image,
label_start_link::start as label_start_link, partial_data::start as data,
+ partial_whitespace::whitespace,
};
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
-const MARKERS: [Code; 8] = [
- Code::Char(' '), // `hard_break_trailing`
- Code::Char('!'), // `label_start_image`
- Code::Char('&'), // `character_reference`
- Code::Char('<'), // `autolink`, `html_text`
- Code::Char('['), // `label_start_link`
- Code::Char('\\'), // `character_escape`, `hard_break_escape`
- Code::Char(']'), // `label_end`
- Code::Char('`'), // `code_text`
+const MARKERS: [Code; 10] = [
+ Code::VirtualSpace, // `whitespace`
+ Code::Char('\t'), // `whitespace`
+ Code::Char(' '), // `hard_break_trailing`, `whitespace`
+ Code::Char('!'), // `label_start_image`
+ Code::Char('&'), // `character_reference`
+ Code::Char('<'), // `autolink`, `html_text`
+ Code::Char('['), // `label_start_link`
+ Code::Char('\\'), // `character_escape`, `hard_break_escape`
+ Code::Char(']'), // `label_end`
+ Code::Char('`'), // `code_text`
];
/// Before text.
@@ -62,6 +65,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
Box::new(label_end),
Box::new(label_start_image),
Box::new(label_start_link),
+ Box::new(whitespace),
],
|ok| Box::new(if ok { start } else { before_data }),
)(tokenizer, code),