From 2ce19d9fd8f75ee1e3d62762e91f5d18303d4d6b Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 11 Jul 2022 19:49:34 +0200 Subject: Add support for lazy lines --- src/construct/code_fenced.rs | 44 +++++++++++++------ src/construct/code_indented.rs | 23 +++++----- src/construct/html_flow.rs | 60 ++++++++++++++++++-------- src/construct/mod.rs | 1 + src/construct/partial_non_lazy_continuation.rs | 26 +++++++++++ 5 files changed, 112 insertions(+), 42 deletions(-) create mode 100644 src/construct/partial_non_lazy_continuation.rs (limited to 'src/construct') diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index c7b2334..18beb92 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -102,7 +102,10 @@ //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE}; -use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::construct::{ + partial_non_lazy_continuation::start as partial_non_lazy_continuation, + partial_space_or_tab::{space_or_tab, space_or_tab_min_max}, +}; use crate::token::Token; use crate::tokenizer::{Code, ContentType, State, StateFnResult, Tokenizer}; use crate::util::span::from_exit_event; @@ -376,22 +379,35 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { let clone = info.clone(); - match code { - Code::None => after(tokenizer, code, info), - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt( - |t, c| close_begin(t, c, info), - |ok| { - if ok { - Box::new(|t, c| after(t, c, clone)) - } else { - Box::new(|t, c| content_before(t, c, clone)) - } - }, - )(tokenizer, code), - _ => unreachable!("expected eof/eol"), + if tokenizer.lazy { + after(tokenizer, code, info) + } else { + tokenizer.check(partial_non_lazy_continuation, |ok| { + if ok { + Box::new(move |t, c| at_non_lazy_break(t, c, clone)) + } else { + Box::new(move |t, c| after(t, c, clone)) + } + })(tokenizer, code) } } +/// To do. +fn at_non_lazy_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + let clone = info.clone(); + + tokenizer.attempt( + |t, c| close_begin(t, c, info), + |ok| { + if ok { + Box::new(|t, c| after(t, c, clone)) + } else { + Box::new(|t, c| content_before(t, c, clone)) + } + }, + )(tokenizer, code) +} + /// Before a closing fence, at the line ending. /// /// ```markdown diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 8966249..74a0938 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -128,17 +128,20 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// cd /// ``` fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - // To do: `nok` if lazy line. - match code { - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - tokenizer.enter(Token::LineEnding); - tokenizer.consume(code); - tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(further_start)), None) + if tokenizer.lazy { + (State::Nok, None) + } else { + match code { + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.enter(Token::LineEnding); + tokenizer.consume(code); + tokenizer.exit(Token::LineEnding); + (State::Fn(Box::new(further_start)), None) + } + _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { + Box::new(if ok { further_end } else { further_begin }) + })(tokenizer, code), } - _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { - Box::new(if ok { further_end } else { further_begin }) - })(tokenizer, code), } } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index f30db3f..a8b1efc 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -100,7 +100,9 @@ use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE}; use crate::construct::{ - blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max, + blank_line::start as blank_line, + partial_non_lazy_continuation::start as partial_non_lazy_continuation, + partial_space_or_tab::space_or_tab_min_max, }; use crate::token::Token; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; @@ -425,7 +427,7 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes info.kind = Kind::Complete; // Do not support complete HTML when interrupting. - if tokenizer.interrupt { + if tokenizer.interrupt && !tokenizer.lazy { (State::Nok, None) } else if info.start_tag { complete_attribute_name_before(tokenizer, code, info) @@ -805,25 +807,52 @@ fn continuation_at_line_ending(tokenizer: &mut Tokenizer, code: Code, info: Info /// asd /// ``` fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + tokenizer.check(partial_non_lazy_continuation, |ok| { + let func = if ok { + html_continue_start_non_lazy + } else { + html_continue_after + }; + Box::new(move |t, c| func(t, c, info)) + })(tokenizer, code) +} + +/// To do. +#[allow(clippy::needless_pass_by_value)] +fn html_continue_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + tokenizer.exit(Token::HtmlFlow); + // Feel free to interrupt. + tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; + (State::Ok, Some(vec![code])) +} + +/// To do. +fn html_continue_start_non_lazy( + tokenizer: &mut Tokenizer, + code: Code, + info: Info, +) -> StateFnResult { match code { - Code::None => { - tokenizer.exit(Token::HtmlFlow); - // Feel free to interrupt. - tokenizer.interrupt = false; - // Restore previous `concrete`. - tokenizer.concrete = info.concrete; - (State::Ok, Some(vec![code])) - } - // To do: do not allow lazy lines. Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); ( - State::Fn(Box::new(|t, c| html_continue_start(t, c, info))), + State::Fn(Box::new(|t, c| html_continue_before(t, c, info))), None, ) } + _ => unreachable!("expected eol"), + } +} + +fn html_continue_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + html_continue_start(tokenizer, code, info) + } _ => { tokenizer.enter(Token::HtmlFlowData); continuation(tokenizer, code, info) @@ -976,12 +1005,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::HtmlFlowData); - tokenizer.exit(Token::HtmlFlow); - // Feel free to interrupt. - tokenizer.interrupt = false; - // Restore previous `concrete`. - tokenizer.concrete = info.concrete; - (State::Ok, Some(vec![code])) + html_continue_after(tokenizer, code, info) } _ => { tokenizer.consume(code); diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ac830ef..06ff4e9 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -84,6 +84,7 @@ pub mod paragraph; pub mod partial_data; pub mod partial_destination; pub mod partial_label; +pub mod partial_non_lazy_continuation; pub mod partial_space_or_tab; pub mod partial_title; pub mod partial_whitespace; diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs new file mode 100644 index 0000000..7964de3 --- /dev/null +++ b/src/construct/partial_non_lazy_continuation.rs @@ -0,0 +1,26 @@ +//! To do. + +use crate::token::Token; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +/// To do. +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.enter(Token::LineEnding); + tokenizer.consume(code); + tokenizer.exit(Token::LineEnding); + (State::Fn(Box::new(non_lazy_after)), None) + } + _ => (State::Nok, None), + } +} + +/// To do. +fn non_lazy_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if tokenizer.lazy { + (State::Nok, None) + } else { + (State::Ok, Some(vec![code])) + } +} -- cgit