diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-20 17:06:00 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-20 17:06:00 +0200 |
commit | 65dd765cceee8bdccc74c08066eec59a579a16b1 (patch) | |
tree | cb60ab00039135b6f0a65efcb508f73a8b39aa11 /src/construct/partial_space_or_tab.rs | |
parent | 61271d73128f8553f8c4c17927828cde52a25eba (diff) | |
download | markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.tar.gz markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.tar.bz2 markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.zip |
Add improved whitespace handling
* add several helpers for parsing betwen x and y `space_or_tab`s
* use those helpers in a bunch of places
* move initial indent parsing to flow constructs themselves
Diffstat (limited to 'src/construct/partial_space_or_tab.rs')
-rw-r--r-- | src/construct/partial_space_or_tab.rs | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs new file mode 100644 index 0000000..40ece49 --- /dev/null +++ b/src/construct/partial_space_or_tab.rs @@ -0,0 +1,98 @@ +//! Several helpers to parse whitespace (`space_or_tab`). +//! +//! ## References +//! +//! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js) + +use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer}; + +/// Optional `space_or_tab` +/// +/// ```bnf +/// space_or_tab_opt ::= *( ' ' '\t' ) +/// ``` +pub fn space_or_tab_opt() -> Box<StateFn> { + space_or_tab_min_max(0, usize::MAX) +} + +/// Between `x` and `y` `space_or_tab` +/// +/// ```bnf +/// space_or_tab_min_max ::= x*y( ' ' '\t' ) +/// ``` +pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> { + space_or_tab(TokenType::Whitespace, min, max) +} + +/// Between `x` and `y` `space_or_tab`, with the given token type. +/// +/// ```bnf +/// space_or_tab ::= x*y( ' ' '\t' ) +/// ``` +pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> { + Box::new(move |t, c| start(t, c, kind, min, max)) +} + +/// Before whitespace. +/// +/// ```markdown +/// alpha| bravo +/// ``` +fn start( + tokenizer: &mut Tokenizer, + code: Code, + kind: TokenType, + min: usize, + max: usize, +) -> StateFnResult { + match code { + Code::VirtualSpace | Code::Char('\t' | ' ') if max > 0 => { + tokenizer.enter(kind.clone()); + tokenizer.consume(code); + ( + State::Fn(Box::new(move |tokenizer, code| { + inside(tokenizer, code, kind, min, max, 1) + })), + None, + ) + } + _ => ( + if min == 0 { State::Ok } else { State::Nok }, + Some(vec![code]), + ), + } +} + +/// In whitespace. +/// +/// ```markdown +/// alpha |bravo +/// alpha | bravo +/// ``` +fn inside( + tokenizer: &mut Tokenizer, + code: Code, + kind: TokenType, + min: usize, + max: usize, + size: usize, +) -> StateFnResult { + match code { + Code::VirtualSpace | Code::Char('\t' | ' ') if size < max => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |tokenizer, code| { + inside(tokenizer, code, kind, min, max, size + 1) + })), + None, + ) + } + _ => { + tokenizer.exit(kind); + ( + if size >= min { State::Ok } else { State::Nok }, + Some(vec![code]), + ) + } + } +} |