From 65dd765cceee8bdccc74c08066eec59a579a16b1 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 20 Jun 2022 17:06:00 +0200 Subject: Add improved whitespace handling * add several helpers for parsing betwen x and y `space_or_tab`s * use those helpers in a bunch of places * move initial indent parsing to flow constructs themselves --- src/construct/heading_atx.rs | 52 ++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 29 deletions(-) (limited to 'src/construct/heading_atx.rs') diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index ab8b6a5..12d4193 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -47,6 +47,7 @@ //! [wiki-setext]: https://en.wikipedia.org/wiki/Setext //! [atx]: http://www.aaronsw.com/2002/atx/ +use super::partial_space_or_tab::{space_or_tab, space_or_tab_opt}; use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -56,8 +57,17 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// |## alpha /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.enter(TokenType::HeadingAtx); + tokenizer.go(space_or_tab_opt(), before)(tokenizer, code) +} + +/// Start of a heading (atx), after whitespace. +/// +/// ```markdown +/// |## alpha +/// ``` +pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if Code::Char('#') == code { - tokenizer.enter(TokenType::HeadingAtx); tokenizer.enter(TokenType::HeadingAtxSequence); sequence_open(tokenizer, code, 0) } else { @@ -72,12 +82,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnResult { match code { - Code::None - | Code::CarriageReturnLineFeed - | Code::VirtualSpace - | Code::Char('\t' | '\n' | '\r' | ' ') - if rank > 0 => - { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') if rank > 0 => { tokenizer.exit(TokenType::HeadingAtxSequence); at_break(tokenizer, code) } @@ -90,6 +95,13 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnR None, ) } + _ if rank > 0 => { + tokenizer.exit(TokenType::HeadingAtxSequence); + tokenizer.go( + space_or_tab(TokenType::HeadingAtxWhitespace, 1, usize::MAX), + at_break, + )(tokenizer, code) + } _ => (State::Nok, None), } } @@ -109,10 +121,10 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(TokenType::HeadingAtx); (State::Ok, Some(vec![code])) } - Code::VirtualSpace | Code::Char('\t' | ' ') => { - tokenizer.enter(TokenType::HeadingAtxWhitespace); - whitespace(tokenizer, code) - } + Code::VirtualSpace | Code::Char('\t' | ' ') => tokenizer.go( + space_or_tab(TokenType::HeadingAtxWhitespace, 1, usize::MAX), + at_break, + )(tokenizer, code), Code::Char('#') => { tokenizer.enter(TokenType::HeadingAtxSequence); further_sequence(tokenizer, code) @@ -141,24 +153,6 @@ fn further_sequence(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// In whitespace. -/// -/// ```markdown -/// ## alpha | bravo -/// ``` -fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::VirtualSpace | Code::Char('\t' | ' ') => { - tokenizer.consume(code); - (State::Fn(Box::new(whitespace)), None) - } - _ => { - tokenizer.exit(TokenType::HeadingAtxWhitespace); - at_break(tokenizer, code) - } - } -} - /// In text. /// /// ```markdown -- cgit