diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-20 17:06:00 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-20 17:06:00 +0200 |
commit | 65dd765cceee8bdccc74c08066eec59a579a16b1 (patch) | |
tree | cb60ab00039135b6f0a65efcb508f73a8b39aa11 /src/construct/code_indented.rs | |
parent | 61271d73128f8553f8c4c17927828cde52a25eba (diff) | |
download | markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.tar.gz markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.tar.bz2 markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.zip |
Add improved whitespace handling
* add several helpers for parsing betwen x and y `space_or_tab`s
* use those helpers in a bunch of places
* move initial indent parsing to flow constructs themselves
Diffstat (limited to 'src/construct/code_indented.rs')
-rw-r--r-- | src/construct/code_indented.rs | 96 |
1 files changed, 35 insertions, 61 deletions
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 55b8901..64956be 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -38,6 +38,7 @@ //! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element +use super::partial_space_or_tab::{space_or_tab_min_max, space_or_tab_opt}; use crate::constant::TAB_SIZE; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -46,46 +47,13 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// ```markdown /// | asd /// ``` -pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::VirtualSpace | Code::Char(' ' | '\t') => { - tokenizer.enter(TokenType::CodeIndented); - tokenizer.enter(TokenType::CodeIndentedPrefixWhitespace); - indent(tokenizer, code, 0) - } - _ => (State::Nok, None), - } -} - -/// Inside the initial whitespace. -/// -/// ```markdown -/// | asd -/// | asd -/// | asd -/// |asd -/// ``` /// /// > **Parsing note**: it is not needed to check if this first line is a /// > filled line (that it has a non-whitespace character), because blank lines /// > are parsed already, so we never run into that. -fn indent(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { - match code { - _ if size == TAB_SIZE => { - tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace); - at_break(tokenizer, code) - } - Code::VirtualSpace | Code::Char(' ' | '\t') => { - tokenizer.consume(code); - ( - State::Fn(Box::new(move |tokenizer, code| { - indent(tokenizer, code, size + 1) - })), - None, - ) - } - _ => (State::Nok, None), - } +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.enter(TokenType::CodeIndented); + tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer, code) } /// At a break. @@ -153,39 +121,45 @@ fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(TokenType::LineEnding); (State::Fn(Box::new(further_start)), None) } - Code::VirtualSpace | Code::Char(' ' | '\t') => { - tokenizer.enter(TokenType::CodeIndentedPrefixWhitespace); - further_indent(tokenizer, code, 0) - } - _ => (State::Nok, None), + _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { + Box::new(if ok { further_end } else { further_begin }) + })(tokenizer, code), } } -/// Inside further whitespace. +/// After a proper indent. /// /// ```markdown /// asd -/// | asd +/// |asd /// ``` -fn further_indent(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { +fn further_end(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + (State::Ok, Some(vec![code])) +} + +/// At the beginning of a line that is not indented enough. +/// +/// > 👉 **Note**: `␠` represents a space character. +/// +/// ```markdown +/// asd +/// |␠␠ +/// asd +/// ``` +fn further_begin(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.go(space_or_tab_opt(), further_after)(tokenizer, code) +} + +/// After whitespace. +/// +/// ```markdown +/// asd +/// ␠␠| +/// asd +/// ``` +fn further_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - _ if size == TAB_SIZE => { - tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace); - (State::Ok, Some(vec![code])) - } - Code::VirtualSpace | Code::Char(' ' | '\t') => { - tokenizer.consume(code); - ( - State::Fn(Box::new(move |tokenizer, code| { - further_indent(tokenizer, code, size + 1) - })), - None, - ) - } - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace); - further_start(tokenizer, code) - } + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => further_start(tokenizer, code), _ => (State::Nok, None), } } |