From 2ce19d9fd8f75ee1e3d62762e91f5d18303d4d6b Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Mon, 11 Jul 2022 19:49:34 +0200
Subject: Add support for lazy lines

---
 src/construct/code_fenced.rs                   | 44 +++++++++++++------
 src/construct/code_indented.rs                 | 23 +++++-----
 src/construct/html_flow.rs                     | 60 ++++++++++++++++++--------
 src/construct/mod.rs                           |  1 +
 src/construct/partial_non_lazy_continuation.rs | 26 +++++++++++
 5 files changed, 112 insertions(+), 42 deletions(-)
 create mode 100644 src/construct/partial_non_lazy_continuation.rs

(limited to 'src/construct')

diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index c7b2334..18beb92 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -102,7 +102,10 @@
 //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
 
 use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
-use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
+use crate::construct::{
+    partial_non_lazy_continuation::start as partial_non_lazy_continuation,
+    partial_space_or_tab::{space_or_tab, space_or_tab_min_max},
+};
 use crate::token::Token;
 use crate::tokenizer::{Code, ContentType, State, StateFnResult, Tokenizer};
 use crate::util::span::from_exit_event;
@@ -376,22 +379,35 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
 fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     let clone = info.clone();
 
-    match code {
-        Code::None => after(tokenizer, code, info),
-        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt(
-            |t, c| close_begin(t, c, info),
-            |ok| {
-                if ok {
-                    Box::new(|t, c| after(t, c, clone))
-                } else {
-                    Box::new(|t, c| content_before(t, c, clone))
-                }
-            },
-        )(tokenizer, code),
-        _ => unreachable!("expected eof/eol"),
+    if tokenizer.lazy {
+        after(tokenizer, code, info)
+    } else {
+        tokenizer.check(partial_non_lazy_continuation, |ok| {
+            if ok {
+                Box::new(move |t, c| at_non_lazy_break(t, c, clone))
+            } else {
+                Box::new(move |t, c| after(t, c, clone))
+            }
+        })(tokenizer, code)
     }
 }
 
+/// To do.
+fn at_non_lazy_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+    let clone = info.clone();
+
+    tokenizer.attempt(
+        |t, c| close_begin(t, c, info),
+        |ok| {
+            if ok {
+                Box::new(|t, c| after(t, c, clone))
+            } else {
+                Box::new(|t, c| content_before(t, c, clone))
+            }
+        },
+    )(tokenizer, code)
+}
+
 /// Before a closing fence, at the line ending.
 ///
 /// ```markdown
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 8966249..74a0938 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -128,17 +128,20 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 ///     cd
 /// ```
 fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    // To do: `nok` if lazy line.
-    match code {
-        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            tokenizer.enter(Token::LineEnding);
-            tokenizer.consume(code);
-            tokenizer.exit(Token::LineEnding);
-            (State::Fn(Box::new(further_start)), None)
+    if tokenizer.lazy {
+        (State::Nok, None)
+    } else {
+        match code {
+            Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+                tokenizer.enter(Token::LineEnding);
+                tokenizer.consume(code);
+                tokenizer.exit(Token::LineEnding);
+                (State::Fn(Box::new(further_start)), None)
+            }
+            _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
+                Box::new(if ok { further_end } else { further_begin })
+            })(tokenizer, code),
         }
-        _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
-            Box::new(if ok { further_end } else { further_begin })
-        })(tokenizer, code),
     }
 }
 
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index f30db3f..a8b1efc 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -100,7 +100,9 @@
 
 use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE};
 use crate::construct::{
-    blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max,
+    blank_line::start as blank_line,
+    partial_non_lazy_continuation::start as partial_non_lazy_continuation,
+    partial_space_or_tab::space_or_tab_min_max,
 };
 use crate::token::Token;
 use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
@@ -425,7 +427,7 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
                 info.kind = Kind::Complete;
 
                 // Do not support complete HTML when interrupting.
-                if tokenizer.interrupt {
+                if tokenizer.interrupt && !tokenizer.lazy {
                     (State::Nok, None)
                 } else if info.start_tag {
                     complete_attribute_name_before(tokenizer, code, info)
@@ -805,25 +807,52 @@ fn continuation_at_line_ending(tokenizer: &mut Tokenizer, code: Code, info: Info
 /// asd
 /// ```
 fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+    tokenizer.check(partial_non_lazy_continuation, |ok| {
+        let func = if ok {
+            html_continue_start_non_lazy
+        } else {
+            html_continue_after
+        };
+        Box::new(move |t, c| func(t, c, info))
+    })(tokenizer, code)
+}
+
+/// To do.
+#[allow(clippy::needless_pass_by_value)]
+fn html_continue_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+    tokenizer.exit(Token::HtmlFlow);
+    // Feel free to interrupt.
+    tokenizer.interrupt = false;
+    // Restore previous `concrete`.
+    tokenizer.concrete = info.concrete;
+    (State::Ok, Some(vec![code]))
+}
+
+/// To do.
+fn html_continue_start_non_lazy(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    info: Info,
+) -> StateFnResult {
     match code {
-        Code::None => {
-            tokenizer.exit(Token::HtmlFlow);
-            // Feel free to interrupt.
-            tokenizer.interrupt = false;
-            // Restore previous `concrete`.
-            tokenizer.concrete = info.concrete;
-            (State::Ok, Some(vec![code]))
-        }
-        // To do: do not allow lazy lines.
         Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             tokenizer.enter(Token::LineEnding);
             tokenizer.consume(code);
             tokenizer.exit(Token::LineEnding);
             (
-                State::Fn(Box::new(|t, c| html_continue_start(t, c, info))),
+                State::Fn(Box::new(|t, c| html_continue_before(t, c, info))),
                 None,
             )
         }
+        _ => unreachable!("expected eol"),
+    }
+}
+
+fn html_continue_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            html_continue_start(tokenizer, code, info)
+        }
         _ => {
             tokenizer.enter(Token::HtmlFlowData);
             continuation(tokenizer, code, info)
@@ -976,12 +1005,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             tokenizer.exit(Token::HtmlFlowData);
-            tokenizer.exit(Token::HtmlFlow);
-            // Feel free to interrupt.
-            tokenizer.interrupt = false;
-            // Restore previous `concrete`.
-            tokenizer.concrete = info.concrete;
-            (State::Ok, Some(vec![code]))
+            html_continue_after(tokenizer, code, info)
         }
         _ => {
             tokenizer.consume(code);
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index ac830ef..06ff4e9 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -84,6 +84,7 @@ pub mod paragraph;
 pub mod partial_data;
 pub mod partial_destination;
 pub mod partial_label;
+pub mod partial_non_lazy_continuation;
 pub mod partial_space_or_tab;
 pub mod partial_title;
 pub mod partial_whitespace;
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
new file mode 100644
index 0000000..7964de3
--- /dev/null
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -0,0 +1,26 @@
+//! To do.
+
+use crate::token::Token;
+use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
+
+/// To do.
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.enter(Token::LineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(Token::LineEnding);
+            (State::Fn(Box::new(non_lazy_after)), None)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// To do.
+fn non_lazy_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    if tokenizer.lazy {
+        (State::Nok, None)
+    } else {
+        (State::Ok, Some(vec![code]))
+    }
+}
-- 
cgit