aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/partial_space_or_tab.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 17:06:00 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 17:06:00 +0200
commit65dd765cceee8bdccc74c08066eec59a579a16b1 (patch)
treecb60ab00039135b6f0a65efcb508f73a8b39aa11 /src/construct/partial_space_or_tab.rs
parent61271d73128f8553f8c4c17927828cde52a25eba (diff)
downloadmarkdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.tar.gz
markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.tar.bz2
markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.zip
Add improved whitespace handling
* add several helpers for parsing betwen x and y `space_or_tab`s * use those helpers in a bunch of places * move initial indent parsing to flow constructs themselves
Diffstat (limited to 'src/construct/partial_space_or_tab.rs')
-rw-r--r--src/construct/partial_space_or_tab.rs98
1 files changed, 98 insertions, 0 deletions
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
new file mode 100644
index 0000000..40ece49
--- /dev/null
+++ b/src/construct/partial_space_or_tab.rs
@@ -0,0 +1,98 @@
+//! Several helpers to parse whitespace (`space_or_tab`).
+//!
+//! ## References
+//!
+//! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js)
+
+use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer};
+
+/// Optional `space_or_tab`
+///
+/// ```bnf
+/// space_or_tab_opt ::= *( ' ' '\t' )
+/// ```
+pub fn space_or_tab_opt() -> Box<StateFn> {
+ space_or_tab_min_max(0, usize::MAX)
+}
+
+/// Between `x` and `y` `space_or_tab`
+///
+/// ```bnf
+/// space_or_tab_min_max ::= x*y( ' ' '\t' )
+/// ```
+pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
+ space_or_tab(TokenType::Whitespace, min, max)
+}
+
+/// Between `x` and `y` `space_or_tab`, with the given token type.
+///
+/// ```bnf
+/// space_or_tab ::= x*y( ' ' '\t' )
+/// ```
+pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> {
+ Box::new(move |t, c| start(t, c, kind, min, max))
+}
+
+/// Before whitespace.
+///
+/// ```markdown
+/// alpha| bravo
+/// ```
+fn start(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ kind: TokenType,
+ min: usize,
+ max: usize,
+) -> StateFnResult {
+ match code {
+ Code::VirtualSpace | Code::Char('\t' | ' ') if max > 0 => {
+ tokenizer.enter(kind.clone());
+ tokenizer.consume(code);
+ (
+ State::Fn(Box::new(move |tokenizer, code| {
+ inside(tokenizer, code, kind, min, max, 1)
+ })),
+ None,
+ )
+ }
+ _ => (
+ if min == 0 { State::Ok } else { State::Nok },
+ Some(vec![code]),
+ ),
+ }
+}
+
+/// In whitespace.
+///
+/// ```markdown
+/// alpha |bravo
+/// alpha | bravo
+/// ```
+fn inside(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ kind: TokenType,
+ min: usize,
+ max: usize,
+ size: usize,
+) -> StateFnResult {
+ match code {
+ Code::VirtualSpace | Code::Char('\t' | ' ') if size < max => {
+ tokenizer.consume(code);
+ (
+ State::Fn(Box::new(move |tokenizer, code| {
+ inside(tokenizer, code, kind, min, max, size + 1)
+ })),
+ None,
+ )
+ }
+ _ => {
+ tokenizer.exit(kind);
+ (
+ if size >= min { State::Ok } else { State::Nok },
+ Some(vec![code]),
+ )
+ }
+ }
+}