diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-30 16:35:13 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-30 16:35:13 +0200 |
commit | be62b2e29a61774100f676cfdd9b100cadf1905f (patch) | |
tree | 4349e259fc0150526dc32242b92d85218091fca5 /src/construct/partial_whitespace.rs | |
parent | 13588776d65601a41ddfce85f618e8aaa55951cc (diff) | |
download | markdown-rs-be62b2e29a61774100f676cfdd9b100cadf1905f.tar.gz markdown-rs-be62b2e29a61774100f676cfdd9b100cadf1905f.tar.bz2 markdown-rs-be62b2e29a61774100f676cfdd9b100cadf1905f.zip |
Add support for trimming whitespace around string, text
This commit introduces trimming initial and final whitespace around the
whole string or text, or around line endings inside that string or text.
* Add `register_resolver_before`, to run resolvers earlier than others,
used for labels
* Add resolver to merge `data` events, which are the most frequent token
that occurs, and can happen adjacently.
In `micromark-js` this sped up parsing a lot
* Fix a bug where a virtual space was not seen as an okay event
* Refactor to enable all turned off whitespace tests
Diffstat (limited to '')
-rw-r--r-- | src/construct/partial_whitespace.rs | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs new file mode 100644 index 0000000..9a7a54d --- /dev/null +++ b/src/construct/partial_whitespace.rs @@ -0,0 +1,56 @@ +//! Trailing whitespace occurs in [string][] and [text][]. +//! +//! It occurs at the start or end of the whole, or around line endings. +//! This whitespace is ignored +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! ; Restriction: the start and end here count as an eol. +//! whitespace ::= 0.*space_or_tab eol 0.*space_or_tab +//! ``` +//! +//! ## References +//! +//! * [`initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js) +//! +//! [string]: crate::content::string +//! [text]: crate::content::text + +use super::partial_space_or_tab::space_or_tab; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +/// Parse initial or final whitespace. +pub fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.go( + // Nothing if there’s no whitespace. + space_or_tab(), + if matches!( + tokenizer.previous, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + ) { + // If there’s whitespace, and we were at an eol/eof, `ok` + ok + } else { + // If there’s whitespace, and we were not at an eol/eof, there must be one here. + at_eol + }, + )(tokenizer, code) +} + +/// After whitespace, at an eol/eof. +fn at_eol(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if matches!( + code, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + ) { + ok(tokenizer, code) + } else { + (State::Nok, None) + } +} + +/// Fine. +fn ok(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + (State::Ok, Some(vec![code])) +} |