From dfd11b1bc155ae1fba9975a90c2dc83dc07697b4 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 28 Jun 2022 14:18:17 +0200 Subject: Fix jumps in `edit_map` * Use resolve more often (e.g., heading (atx, setext)) * Fix to link whole phrasing (e.g., one big chunk of text in heading (atx, setext), titles, labels) * Replace `ChunkText`, `ChunkString`, with `event.content_type: Option` * Refactor to externalize `edit_map` from `label` --- src/construct/partial_space_or_tab.rs | 161 +++++++++++++++++++++++++--------- 1 file changed, 120 insertions(+), 41 deletions(-) (limited to 'src/construct/partial_space_or_tab.rs') diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 43bdc53..8df7601 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -4,7 +4,8 @@ //! //! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js) -use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer}; +use crate::subtokenize::link; +use crate::tokenizer::{Code, ContentType, State, StateFn, StateFnResult, TokenType, Tokenizer}; /// Options to parse whitespace. #[derive(Debug)] @@ -15,6 +16,25 @@ pub struct Options { pub max: usize, /// Token type to use for whitespace events. pub kind: TokenType, + /// To do. + pub content_type: Option, + pub connect: bool, +} + +#[derive(Debug)] +pub struct OneLineEndingOptions { + /// To do. + pub content_type: Option, + pub connect: bool, +} + +/// Options to parse whitespace. +#[derive(Debug)] +struct OneLineInfo { + /// Whether something was seen. + connect: bool, + /// Configuration. + options: OneLineEndingOptions, } /// Options to parse whitespace. @@ -35,45 +55,6 @@ pub fn space_or_tab() -> Box { space_or_tab_min_max(1, usize::MAX) } -pub fn space_or_tab_one_line_ending() -> Box { - Box::new(|tokenizer, code| { - tokenizer.attempt(space_or_tab(), move |ok| { - Box::new(move |tokenizer, code| match code { - Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { - tokenizer.enter(TokenType::LineEnding); - tokenizer.consume(code); - tokenizer.exit(TokenType::LineEnding); - ( - State::Fn(Box::new(tokenizer.attempt_opt( - space_or_tab(), - move |_t, code| { - if !matches!( - code, - Code::None - | Code::CarriageReturnLineFeed - | Code::Char('\r' | '\n') - ) { - (State::Ok, Some(vec![code])) - } else { - (State::Nok, None) - } - }, - ))), - None, - ) - } - _ => { - if ok { - (State::Ok, Some(vec![code])) - } else { - (State::Nok, None) - } - } - }) - })(tokenizer, code) - }) -} - /// Between `x` and `y` `space_or_tab` /// /// ```bnf @@ -84,6 +65,8 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box { kind: TokenType::SpaceOrTab, min, max, + content_type: None, + connect: false, }) } @@ -104,7 +87,13 @@ pub fn space_or_tab_with_options(options: Options) -> Box { fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::VirtualSpace | Code::Char('\t' | ' ') if info.options.max > 0 => { - tokenizer.enter(info.options.kind.clone()); + tokenizer.enter_with_content(info.options.kind.clone(), info.options.content_type); + + if info.options.content_type.is_some() { + let index = tokenizer.events.len() - 1; + link(&mut tokenizer.events, index); + } + tokenizer.consume(code); info.size += 1; (State::Fn(Box::new(|t, c| inside(t, c, info))), None) @@ -146,3 +135,93 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResul } } } + +pub fn space_or_tab_one_line_ending() -> Box { + space_or_tab_one_line_ending_with_options(OneLineEndingOptions { + content_type: None, + connect: false, + }) +} + +pub fn space_or_tab_one_line_ending_with_options(options: OneLineEndingOptions) -> Box { + Box::new(move |tokenizer, code| { + let mut info = OneLineInfo { + connect: false, + options, + }; + + tokenizer.attempt( + space_or_tab_with_options(Options { + kind: TokenType::SpaceOrTab, + min: 1, + max: usize::MAX, + content_type: info.options.content_type, + connect: info.options.connect, + }), + move |ok| { + if ok && info.options.content_type.is_some() { + info.connect = true; + } + + Box::new(move |tokenizer, code| match code { + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + at_eol(tokenizer, code, info) + } + _ => { + if ok { + (State::Ok, Some(vec![code])) + } else { + (State::Nok, None) + } + } + }) + }, + )(tokenizer, code) + }) +} + +fn at_eol(tokenizer: &mut Tokenizer, code: Code, mut info: OneLineInfo) -> StateFnResult { + match code { + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter_with_content(TokenType::LineEnding, info.options.content_type); + + if info.options.content_type.is_some() { + if info.connect { + let index = tokenizer.events.len() - 1; + link(&mut tokenizer.events, index); + } else { + info.connect = true; + } + } + + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + ( + State::Fn(Box::new(tokenizer.attempt_opt( + space_or_tab_with_options(Options { + kind: TokenType::SpaceOrTab, + min: 1, + max: usize::MAX, + content_type: info.options.content_type, + connect: info.connect, + }), + after_eol, + ))), + None, + ) + } + _ => unreachable!("expected eol"), + } +} + +fn after_eol(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // Blank line not allowed. + if matches!( + code, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + ) { + (State::Nok, None) + } else { + (State::Ok, Some(vec![code])) + } +} -- cgit