4 files changed, 221 insertions, 177 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 4b38c8d..37229a4 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -8,6 +8,7 @@ use crate::util::{
     decode_character_reference::{decode_named, decode_numeric},
     encode::encode,
     sanitize_uri::sanitize_uri,
+    skip,
     span::{codes as codes_from_span, from_exit_event, serialize},
 };
 use std::collections::HashMap;
@@ -241,7 +242,6 @@ struct CompileContext<'a> {
     pub tight_stack: Vec<bool>,
     /// Fields used to influance the current compilation.
     pub slurp_one_line_ending: bool,
-    pub slurp_all_line_endings: bool,
     pub tags: bool,
     pub ignore_encode: bool,
     pub last_was_tag: bool,
@@ -276,7 +276,6 @@ impl<'a> CompileContext<'a> {
             definitions: HashMap::new(),
             tight_stack: vec![],
             slurp_one_line_ending: false,
-            slurp_all_line_endings: false,
             tags: true,
             ignore_encode: false,
             last_was_tag: false,
@@ -718,8 +717,6 @@ fn on_enter_paragraph(context: &mut CompileContext) {
         context.line_ending_if_needed();
         context.tag("<p>".to_string());
     }
-
-    context.slurp_all_line_endings = false;
 }
 
 /// Handle [`Enter`][EventType::Enter]:[`Resource`][Token::Resource].
@@ -785,7 +782,6 @@ fn on_exit_block_quote(context: &mut CompileContext) {
     context.tight_stack.pop();
     context.line_ending_if_needed();
     context.tag("</blockquote>".to_string());
-    context.slurp_all_line_endings = false;
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker].
@@ -1075,9 +1071,7 @@ fn on_exit_label_text(context: &mut CompileContext) {
 
 /// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding].
 fn on_exit_line_ending(context: &mut CompileContext) {
-    if context.slurp_all_line_endings {
-        // Empty.
-    } else if context.slurp_one_line_ending {
+    if context.slurp_one_line_ending {
         context.slurp_one_line_ending = false;
     } else {
         context.push(context.encode_opt(&serialize(
@@ -1156,9 +1150,7 @@ fn on_exit_media(context: &mut CompileContext) {
 fn on_exit_paragraph(context: &mut CompileContext) {
     let tight = context.tight_stack.last().unwrap_or(&false);
 
-    if *tight {
-        context.slurp_all_line_endings = true;
-    } else {
+    if !tight {
         context.tag("</p>".to_string());
     }
 }
@@ -1218,10 +1210,29 @@ fn on_enter_list(context: &mut CompileContext) {
         } else {
             balance -= 1;
 
-            // Blank line directly in list or directly in list item.
-            if balance < 3 && event.token_type == Token::BlankLineEnding {
-                loose = true;
-                break;
+            // Blank line directly in list or directly in list item,
+            // but not a blank line after an empty list item.
+            // To do: does this check if the item is empty?
+            if balance < 3 && event.token_type == Token::BlankLineEnding
+            // && !(balance == 1 && events[index - 2].token_type == Token::ListItem)
+            {
+                let at_list_item = balance == 1 && events[index - 2].token_type == Token::ListItem;
+                let at_empty_list_item = if at_list_item {
+                    let before_item = skip::opt_back(events, index - 2, &[Token::ListItem]);
+                    let before_prefix = skip::opt_back(
+                        events,
+                        index - 3,
+                        &[Token::ListItemPrefix, Token::SpaceOrTab],
+                    );
+                    before_item + 1 == before_prefix
+                } else {
+                    false
+                };
+
+                if !at_list_item || !at_empty_list_item {
+                    loose = true;
+                    break;
+                }
             }
 
             // Done.
@@ -1233,7 +1244,6 @@ fn on_enter_list(context: &mut CompileContext) {
         index += 1;
     }
 
-    println!("list: {:?} {:?}", token_type, loose);
     context.tight_stack.push(!loose);
     context.line_ending_if_needed();
     // Note: no `>`.
@@ -1283,12 +1293,21 @@ fn on_exit_list_item_value(context: &mut CompileContext) {
 
 /// To do.
 fn on_exit_list_item(context: &mut CompileContext) {
-    if context.last_was_tag && !context.slurp_all_line_endings {
+    let tight = context.tight_stack.last().unwrap_or(&false);
+    let before_item = skip::opt_back(
+        context.events,
+        context.index - 1,
+        &[Token::BlankLineEnding, Token::LineEnding, Token::SpaceOrTab],
+    );
+    let previous = &context.events[before_item];
+    let tight_paragraph = *tight && previous.token_type == Token::Paragraph;
+    let empty_item = previous.token_type == Token::ListItemPrefix;
+
+    if !tight_paragraph && !empty_item {
         context.line_ending_if_needed();
     }
 
     context.tag("</li>".to_string());
-    context.slurp_all_line_endings = false;
 }
 
 /// To do.
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index a8b1efc..3300d2f 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -207,7 +207,6 @@ struct Info {
 ///
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     tokenizer.enter(Token::HtmlFlow);
-    tokenizer.enter(Token::HtmlFlowData);
     // To do: allow arbitrary when code (indented) is turned off.
     tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
 }
@@ -219,6 +218,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// ```
 fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     if Code::Char('<') == code {
+        tokenizer.enter(Token::HtmlFlowData);
         tokenizer.consume(code);
         (State::Fn(Box::new(open)), None)
     } else {
@@ -771,11 +771,12 @@ fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes
         Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
             if info.kind == Kind::Basic || info.kind == Kind::Complete =>
         {
+            tokenizer.exit(Token::HtmlFlowData);
             tokenizer.check(blank_line_before, |ok| {
                 let func = if ok {
-                    continuation_close
+                    html_continue_after
                 } else {
-                    continuation_at_line_ending
+                    html_continue_start // continuation_at_line_ending
                 };
                 Box::new(move |t, c| func(t, c, info))
             })(tokenizer, code)
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 960c0eb..d06eaf0 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -267,7 +267,7 @@ pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         index > 0 && tokenizer.events[index - 1].token_type == Token::BlankLineEnding;
     let mut further_blank = false;
 
-    if currently_blank && index > 3 {
+    if currently_blank && index > 5 {
         let before = skip::opt_back(&tokenizer.events, index - 3, &[Token::SpaceOrTab]);
         further_blank = tokenizer.events[before].token_type == Token::BlankLineEnding;
     }
@@ -338,7 +338,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
                         && skip::opt(
                             &tokenizer.events,
                             previous.3 + 1,
-                            &[Token::LineEnding, Token::BlankLineEnding],
+                            &[Token::SpaceOrTab, Token::LineEnding, Token::BlankLineEnding],
                         ) == current.2
                     {
                         println!("prev:match {:?} {:?}", previous, current);
diff --git a/src/content/document.rs b/src/content/document.rs
index b29e4b9..f6b8f55 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -10,7 +10,7 @@
 
 use crate::construct::{
     block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote},
-    list::{cont as list_const, end as list_end, start as list},
+    list::{cont as list_item_const, end as list_item_end, start as list_item},
 };
 use crate::content::flow::start as flow;
 use crate::parser::ParseState;
@@ -25,12 +25,19 @@ use crate::util::{
 };
 use std::collections::HashSet;
 
+#[derive(Debug, PartialEq)]
+enum Container {
+    BlockQuote,
+    ListItem,
+}
+
 struct DocumentInfo {
     continued: usize,
-    containers_begin_index: usize,
+    index: usize,
     paragraph_before: bool,
     inject: Vec<(Vec<Event>, Vec<Event>)>,
-    stack: Vec<String>,
+    stack: Vec<Container>,
+    stack_close: Vec<Container>,
     next: Box<StateFn>,
 }
 
@@ -73,18 +80,34 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec
 
 fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     let info = DocumentInfo {
+        index: 0,
         continued: 0,
-        paragraph_before: false,
         inject: vec![],
-        containers_begin_index: 0,
-        stack: vec![],
         next: Box::new(flow),
+        paragraph_before: false,
+        stack: vec![],
+        stack_close: vec![],
     };
-    before(tokenizer, code, info)
+    line_start(tokenizer, code, info)
+}
+
+/// Start of a new line.
+fn line_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
+    println!("line_start");
+    info.index = tokenizer.events.len();
+    info.inject.push((vec![], vec![]));
+    info.continued = 0;
+    container_existing_before(tokenizer, code, info)
 }
 
-fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult {
-    println!("before");
+/// Before existing containers.
+fn container_existing_before(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    info: DocumentInfo,
+) -> StateFnResult {
+    println!("container_existing_before");
+
     // First we iterate through the open blocks, starting with the root
     // document, and descending through last children down to the last open
     // block.
@@ -96,45 +119,42 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR
     // But we cannot close unmatched blocks yet, because we may have a lazy
     // continuation line.
     if info.continued < info.stack.len() {
-        let name = &info.stack[info.continued];
-        let cont = if name == "blockquote" {
-            block_quote_cont
-        } else if name == "list" {
-            list_const
-        } else {
-            unreachable!("todo: cont construct {:?}", name)
+        let kind = &info.stack[info.continued];
+        let cont = match kind {
+            Container::BlockQuote => block_quote_cont,
+            Container::ListItem => list_item_const,
         };
 
         // To do: state?
         tokenizer.attempt(cont, move |ok| {
             if ok {
-                Box::new(|t, c| document_continue(t, c, info))
+                Box::new(|t, c| container_existing_after(t, c, info))
             } else {
-                Box::new(|t, c| check_new_containers(t, c, info))
+                Box::new(|t, c| container_new_before(t, c, info))
             }
         })(tokenizer, code)
     } else {
         // Done.
-        check_new_containers(tokenizer, code, info)
+        container_new_before(tokenizer, code, info)
     }
 }
 
-fn document_continue(
+fn container_existing_after(
     tokenizer: &mut Tokenizer,
     code: Code,
     mut info: DocumentInfo,
 ) -> StateFnResult {
-    println!("document_continue");
+    println!("container_existing_after");
     info.continued += 1;
-    before(tokenizer, code, info)
+    container_existing_before(tokenizer, code, info)
 }
 
-fn check_new_containers(
+fn container_new_before(
     tokenizer: &mut Tokenizer,
     code: Code,
     info: DocumentInfo,
 ) -> StateFnResult {
-    println!("check_new_containers");
+    println!("container_new_before");
     // Next, after consuming the continuation markers for existing blocks, we
     // look for new block starts (e.g. `>` for a block quote).
     // If we encounter a new block start, we close any blocks unmatched in
@@ -146,10 +166,13 @@ fn check_new_containers(
         // start.
         if tokenizer.concrete {
             println!("  concrete");
-            return there_is_no_new_container(tokenizer, code, info);
+            return containers_after(tokenizer, code, info);
         }
 
-        println!("  to do: interrupt ({:?})?", tokenizer.interrupt);
+        println!(
+            "  to do: set interrupt? (before: {:?})",
+            tokenizer.interrupt
+        );
         //   // If we do have flow, it could still be a blank line,
         //   // but we’d be interrupting it w/ a new container if there’s a current
         //   // construct.
@@ -157,20 +180,21 @@ fn check_new_containers(
         //     childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
         //   )
     } else {
-        tokenizer.interrupt = false;
+        // println!("  set interrupt to `false`! (before: {:?})", tokenizer.interrupt);
+        // tokenizer.interrupt = false;
     }
 
     // Check if there is a new container.
     tokenizer.attempt(block_quote, move |ok| {
         if ok {
-            Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string()))
+            Box::new(|t, c| container_new_after(t, c, info, Container::BlockQuote))
         } else {
             Box::new(|tokenizer, code| {
-                tokenizer.attempt(list, move |ok| {
+                tokenizer.attempt(list_item, move |ok| {
                     if ok {
-                        Box::new(|t, c| there_is_a_new_container(t, c, info, "list".to_string()))
+                        Box::new(|t, c| container_new_after(t, c, info, Container::ListItem))
                     } else {
-                        Box::new(|t, c| there_is_no_new_container(t, c, info))
+                        Box::new(|t, c| containers_after(t, c, info))
                     }
                 })(tokenizer, code)
             })
@@ -178,25 +202,17 @@ fn check_new_containers(
     })(tokenizer, code)
 }
 
-fn there_is_a_new_container(
+fn container_new_after(
     tokenizer: &mut Tokenizer,
     code: Code,
     mut info: DocumentInfo,
-    name: String,
+    kind: Container,
 ) -> StateFnResult {
-    let size = info.continued;
-    println!("exit:0: {:?}", false);
-    info = exit_containers(tokenizer, info, size, false);
-    tokenizer.expect(code, true);
-
     // Remove from the event stack.
     // We’ll properly add exits at different points manually.
-    let end = if name == "blockquote" {
-        block_quote_end
-    } else if name == "list" {
-        list_end
-    } else {
-        unreachable!("todo: end {:?}", name)
+    let end = match kind {
+        Container::BlockQuote => block_quote_end,
+        Container::ListItem => list_item_end,
     };
 
     let token_types = end();
@@ -221,118 +237,42 @@ fn there_is_a_new_container(
         index += 1;
     }
 
-    info.stack.push(name);
-    document_continue(tokenizer, code, info)
-}
-
-/// Exit open containers.
-fn exit_containers(
-    tokenizer: &mut Tokenizer,
-    mut info: DocumentInfo,
-    size: usize,
-    before: bool,
-) -> DocumentInfo {
-    let mut exits: Vec<Event> = vec![];
-
-    if info.stack.len() > size {
-        println!("closing flow");
-        let index = tokenizer.events.len();
-        let result = tokenizer.flush(info.next);
-        info.next = Box::new(flow); // This is weird but Rust needs a function there.
-        assert!(matches!(result.0, State::Ok));
-        assert!(result.1.is_none());
-
-        let mut end = tokenizer.events.len();
-        while end > 0 && end > index {
-            if tokenizer.events[end - 1].token_type != Token::LineEnding {
-                break;
-            }
-
-            end -= 1;
-        }
-
-        let mut add = tokenizer.events.drain(index..end).collect::<Vec<_>>();
-
-        exits.append(&mut add);
-
-        println!("  setting `interrupt: false`");
-        tokenizer.interrupt = false;
-    }
-
-    while info.stack.len() > size {
-        let name = info.stack.pop().unwrap();
-
-        let end = if name == "blockquote" {
-            block_quote_end
-        } else if name == "list" {
-            list_end
-        } else {
-            unreachable!("todo: end {:?}", name)
-        };
-
-        let token_types = end();
-
-        let mut index = 0;
-        while index < token_types.len() {
-            let token_type = &token_types[index];
-
-            exits.push(Event {
-                event_type: EventType::Exit,
-                token_type: token_type.clone(),
-                // Note: positions are fixed later.
-                point: tokenizer.point.clone(),
-                index: tokenizer.index,
-                previous: None,
-                next: None,
-                content_type: None,
-            });
-
-            index += 1;
-        }
-    }
-
-    if !exits.is_empty() {
-        let before = if before { 1 } else { 0 };
-        let mut index = info.inject.len() - 1;
-        if before > index {
-            // To do: maybe, if this branch happens, it’s a bug?
-            println!("inject:0: {:?}", index);
-            index = 0;
-        } else {
-            index -= before;
-            println!("inject:set: {:?}", index);
-        }
-        info.inject[index].1.append(&mut exits);
+    if info.continued < info.stack.len() {
+        info.stack_close
+            .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>());
+        info = line_end(tokenizer, info, false, true);
+        tokenizer.expect(code, true);
     }
 
-    info
+    info.stack.push(kind);
+    info.continued = info.stack.len();
+    container_new_before(tokenizer, code, info)
 }
 
-fn there_is_no_new_container(
+fn containers_after(
     tokenizer: &mut Tokenizer,
     code: Code,
-    info: DocumentInfo,
+    mut info: DocumentInfo,
 ) -> StateFnResult {
-    println!("there_is_no_new_container");
-    tokenizer.lazy = info.continued != info.stack.len();
-    // lineStartOffset = self.now().offset
-    flow_start(tokenizer, code, info)
-}
+    println!("containers_after");
 
-fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
-    println!("flow_start");
+    // Add all container events we parsed.
+    let mut containers = tokenizer.events.drain(info.index..).collect::<Vec<_>>();
+    info.inject.last_mut().unwrap().0.append(&mut containers);
 
-    let containers = tokenizer
-        .events
-        .drain(info.containers_begin_index..)
-        .collect::<Vec<_>>();
-
-    info.inject.push((containers, vec![]));
+    tokenizer.lazy = info.continued != info.stack.len();
+    println!("lazy: {:?} {:?}", info.continued, info.stack.len());
 
     // Define start.
     let point = tokenizer.point.clone();
     tokenizer.define_skip(&point);
 
+    flow_start(tokenizer, code, info)
+}
+
+fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
+    println!("flow_start");
+
     let state = info.next;
     info.next = Box::new(flow); // This is weird but Rust needs a function there.
 
@@ -352,6 +292,7 @@ fn flow_end(
 ) -> StateFnResult {
     println!("flow_end: lazy? {:?}", tokenizer.lazy);
 
+    // To do: clean this!
     let index = tokenizer.events.len();
     let index = if index > 0 {
         skip::opt_back(&tokenizer.events, index - 1, &[Token::LineEnding])
@@ -371,15 +312,14 @@ fn flow_end(
         false
     };
 
-    let mut continued = info.continued;
-    let size = info.stack.len();
+    let mut lazy = false;
 
     if tokenizer.lazy {
         println!("this line was lazy.");
 
         if info.paragraph_before && paragraph {
             println!("it was another paragraph, which is allowed.");
-            continued = size;
+            lazy = true;
         } else {
             println!(
                 "it was something else (prev: {:?}, cur: {:?}), which is not allowed.",
@@ -388,27 +328,31 @@ fn flow_end(
         }
     }
 
-    // Exit containers.
-    println!("exit:1: {:?}", true);
-    info = exit_containers(tokenizer, info, continued, true);
+    if !lazy && info.continued < info.stack.len() {
+        info.stack_close
+            .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>());
+    }
+
+    info = line_end(tokenizer, info, false, false);
     tokenizer.expect(code, true);
 
-    info.continued = 0;
     info.paragraph_before = paragraph;
-    info.containers_begin_index = tokenizer.events.len();
 
     match result {
         State::Ok => {
-            println!("exit:3: {:?}", false);
-            info = exit_containers(tokenizer, info, 0, false);
-            tokenizer.expect(code, true);
+            info.stack_close
+                .append(&mut info.stack.drain(..).collect::<Vec<_>>());
+            info = line_end(tokenizer, info, true, false);
 
             let mut map = EditMap::new();
             let mut line_index = 0;
             let mut index = 0;
 
+            println!("injections: {:#?}", info.inject);
+
             let add = info.inject[line_index].0.clone();
             let mut first_line_ending_in_run: Option<usize> = None;
+            println!("inject:enters:0: {:?}", add.len());
             map.add(0, 0, add);
 
             while index < tokenizer.events.len() {
@@ -427,6 +371,11 @@ fn flow_end(
                             index += 1;
                         }
                         if !add.is_empty() {
+                            println!(
+                                "inject:exits:at-{:?}: {:?}",
+                                first_line_ending_in_run,
+                                add.len()
+                            );
                             map.add(first_line_ending_in_run.unwrap(), 0, add);
                         }
                     } else {
@@ -435,6 +384,7 @@ fn flow_end(
                         if !add.is_empty() {
                             // No longer empty.
                             first_line_ending_in_run = None;
+                            println!("inject:enters:at-{:?}: {:?}", index + 1, add.len());
                             map.add(index + 1, 0, add);
                         }
                     }
@@ -448,6 +398,7 @@ fn flow_end(
             }
 
             let mut add = info.inject[line_index].1.clone();
+            println!("inject:exits:tail-{:?}: {:?}", index, add.len());
             let mut deep_index = 0;
             while deep_index < add.len() {
                 add[deep_index].point = tokenizer.point.clone();
@@ -479,11 +430,84 @@ fn flow_end(
         State::Nok => unreachable!("handle nok in `flow`?"),
         State::Fn(func) => {
             info.next = func;
-            before(tokenizer, code, info)
+            line_start(tokenizer, code, info)
         }
     }
 }
 
+fn line_end(
+    tokenizer: &mut Tokenizer,
+    mut info: DocumentInfo,
+    eof: bool,
+    containers_before: bool,
+) -> DocumentInfo {
+    let mut stack_close = info.stack_close.drain(..).collect::<Vec<_>>();
+    println!("line_end: {:?}", stack_close);
+
+    if stack_close.is_empty() {
+        return info;
+    }
+
+    // So, we’re at the end of a line, but we need to close the *previous* line.
+    if !eof {
+        println!("closing previous flow");
+        tokenizer.define_skip(&tokenizer.point.clone());
+        let mut current_events = tokenizer.events.drain(info.index..).collect::<Vec<_>>();
+        let next = info.next;
+        info.next = Box::new(flow); // This is weird but Rust needs a function there.
+        let result = tokenizer.flush(next);
+        assert!(matches!(result.0, State::Ok));
+        assert!(result.1.is_none());
+
+        if containers_before {
+            info.index = tokenizer.events.len();
+        }
+
+        tokenizer.events.append(&mut current_events);
+    }
+
+    let mut exits: Vec<Event> = vec![];
+
+    while !stack_close.is_empty() {
+        let kind = stack_close.pop().unwrap();
+        let end = match kind {
+            Container::BlockQuote => block_quote_end,
+            Container::ListItem => list_item_end,
+        };
+
+        let token_types = end();
+
+        let mut index = 0;
+        while index < token_types.len() {
+            let token_type = &token_types[index];
+
+            exits.push(Event {
+                event_type: EventType::Exit,
+                token_type: token_type.clone(),
+                // Note: positions are fixed later.
+                point: tokenizer.point.clone(),
+                index: tokenizer.index,
+                previous: None,
+                next: None,
+                content_type: None,
+            });
+
+            index += 1;
+        }
+    }
+
+    let index = info.inject.len() - (if eof { 1 } else { 2 });
+    info.inject[index].1.append(&mut exits);
+
+    println!(
+        "  setting `interrupt: false` (before: {:?}",
+        tokenizer.interrupt
+    );
+    tokenizer.interrupt = false;
+
+    info
+}
+
 fn eof_eol(code: Code) -> bool {
     matches!(
         code,