From 65dd765cceee8bdccc74c08066eec59a579a16b1 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 20 Jun 2022 17:06:00 +0200 Subject: Add improved whitespace handling * add several helpers for parsing betwen x and y `space_or_tab`s * use those helpers in a bunch of places * move initial indent parsing to flow constructs themselves --- src/construct/partial_space_or_tab.rs | 98 +++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 src/construct/partial_space_or_tab.rs (limited to 'src/construct/partial_space_or_tab.rs') diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs new file mode 100644 index 0000000..40ece49 --- /dev/null +++ b/src/construct/partial_space_or_tab.rs @@ -0,0 +1,98 @@ +//! Several helpers to parse whitespace (`space_or_tab`). +//! +//! ## References +//! +//! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js) + +use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer}; + +/// Optional `space_or_tab` +/// +/// ```bnf +/// space_or_tab_opt ::= *( ' ' '\t' ) +/// ``` +pub fn space_or_tab_opt() -> Box { + space_or_tab_min_max(0, usize::MAX) +} + +/// Between `x` and `y` `space_or_tab` +/// +/// ```bnf +/// space_or_tab_min_max ::= x*y( ' ' '\t' ) +/// ``` +pub fn space_or_tab_min_max(min: usize, max: usize) -> Box { + space_or_tab(TokenType::Whitespace, min, max) +} + +/// Between `x` and `y` `space_or_tab`, with the given token type. +/// +/// ```bnf +/// space_or_tab ::= x*y( ' ' '\t' ) +/// ``` +pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box { + Box::new(move |t, c| start(t, c, kind, min, max)) +} + +/// Before whitespace. +/// +/// ```markdown +/// alpha| bravo +/// ``` +fn start( + tokenizer: &mut Tokenizer, + code: Code, + kind: TokenType, + min: usize, + max: usize, +) -> StateFnResult { + match code { + Code::VirtualSpace | Code::Char('\t' | ' ') if max > 0 => { + tokenizer.enter(kind.clone()); + tokenizer.consume(code); + ( + State::Fn(Box::new(move |tokenizer, code| { + inside(tokenizer, code, kind, min, max, 1) + })), + None, + ) + } + _ => ( + if min == 0 { State::Ok } else { State::Nok }, + Some(vec![code]), + ), + } +} + +/// In whitespace. +/// +/// ```markdown +/// alpha |bravo +/// alpha | bravo +/// ``` +fn inside( + tokenizer: &mut Tokenizer, + code: Code, + kind: TokenType, + min: usize, + max: usize, + size: usize, +) -> StateFnResult { + match code { + Code::VirtualSpace | Code::Char('\t' | ' ') if size < max => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |tokenizer, code| { + inside(tokenizer, code, kind, min, max, size + 1) + })), + None, + ) + } + _ => { + tokenizer.exit(kind); + ( + if size >= min { State::Ok } else { State::Nok }, + Some(vec![code]), + ) + } + } +} -- cgit