Fix a lot of list things

* Add `ListItem`, `ListOrdered`, and `ListUnordered` tokens * Add support for multiline list items * Add support for tight lists * Fix bug where 10 digit long list item values worked * Fix skip bug when skipping over nested events
author: Titus Wormer <tituswormer@gmail.com> 2022-07-12 17:47:08 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-12 17:47:08 +0200
commit: 2e3b7abaa9877b658fa4f8f2612acc617dff60bb (patch)
tree: e823d041521a4af33a7e552ba58f1d4b63335be3 /src
parent: 75522b867b15b9a400275cfec9a2ead4ff535473 (diff)
download: markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.tar.gz
markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.tar.bz2
markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.zip
5 files changed, 313 insertions, 56 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 753d85f..2d42011 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -447,7 +447,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
 
     // To do: sort.
     enter_map.insert(Token::ListItemMarker, on_enter_list_item_marker);
-    enter_map.insert(Token::List, on_enter_list);
+    enter_map.insert(Token::ListOrdered, on_enter_list);
+    enter_map.insert(Token::ListUnordered, on_enter_list);
 
     let mut exit_map: Map = HashMap::new();
     exit_map.insert(Token::AutolinkEmail, on_exit_autolink_email);
@@ -525,8 +526,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
     exit_map.insert(Token::ThematicBreak, on_exit_thematic_break);
 
     // To do: sort.
-    exit_map.insert(Token::List, on_exit_list);
     exit_map.insert(Token::ListItemValue, on_exit_list_item_value);
+    exit_map.insert(Token::ListItem, on_exit_list_item);
+    exit_map.insert(Token::ListOrdered, on_exit_list);
+    exit_map.insert(Token::ListUnordered, on_exit_list);
 
     // Handle one event.
     let handle = |context: &mut CompileContext, index: usize| {
@@ -708,8 +711,14 @@ fn on_enter_link(context: &mut CompileContext) {
 
 /// Handle [`Enter`][EventType::Enter]:[`Paragraph`][Token::Paragraph].
 fn on_enter_paragraph(context: &mut CompileContext) {
-    context.line_ending_if_needed();
-    context.tag("<p>".to_string());
+    let tight = context.tight_stack.last().unwrap_or(&false);
+
+    if !tight {
+        context.line_ending_if_needed();
+        context.tag("<p>".to_string());
+    }
+
+    // context.slurp_all_line_endings = false;
 }
 
 /// Handle [`Enter`][EventType::Enter]:[`Resource`][Token::Resource].
@@ -1145,7 +1154,11 @@ fn on_exit_media(context: &mut CompileContext) {
 
 /// Handle [`Exit`][EventType::Exit]:[`Paragraph`][Token::Paragraph].
 fn on_exit_paragraph(context: &mut CompileContext) {
-    context.tag("</p>".to_string());
+    let tight = context.tight_stack.last().unwrap_or(&false);
+
+    if !tight {
+        context.tag("</p>".to_string());
+    }
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`ReferenceString`][Token::ReferenceString].
@@ -1187,14 +1200,58 @@ fn on_exit_thematic_break(context: &mut CompileContext) {
 }
 
 // To do: sort.
+/// To do (onenterlist{un,}ordered)
+fn on_enter_list(context: &mut CompileContext) {
+    let events = &context.events;
+    let mut index = context.index;
+    let mut balance = 0;
+    let mut loose = false;
+    let token_type = &events[index].token_type;
+
+    while index < events.len() {
+        let event = &events[index];
+
+        if event.event_type == EventType::Enter {
+            balance += 1;
+        } else {
+            balance -= 1;
+
+            // Blank line directly in list or directly in list item.
+            if balance < 3 && event.token_type == Token::BlankLineEnding {
+                loose = true;
+                break;
+            }
+
+            // Done.
+            if balance == 0 && event.token_type == *token_type {
+                break;
+            }
+        }
+
+        index += 1;
+    }
+
+    println!("list: {:?} {:?}", token_type, loose);
+    context.tight_stack.push(!loose);
+    context.line_ending_if_needed();
+    // Note: no `>`.
+    context.tag(format!(
+        "<{}",
+        if *token_type == Token::ListOrdered {
+            "ol"
+        } else {
+            "ul"
+        }
+    ));
+    context.expect_first_item = Some(true);
+}
+
 /// To do
 fn on_enter_list_item_marker(context: &mut CompileContext) {
     let expect_first_item = context.expect_first_item.take().unwrap();
 
     if expect_first_item {
         context.tag(">".to_string());
-    } else {
-        on_exit_list_item(context);
     }
 
     context.line_ending_if_needed();
@@ -1204,15 +1261,6 @@ fn on_enter_list_item_marker(context: &mut CompileContext) {
     context.last_was_tag = false;
 }
 
-/// To do (onenterlist{un,}ordered)
-fn on_enter_list(context: &mut CompileContext) {
-    // To do: !token._loose
-    context.tight_stack.push(false);
-    context.line_ending_if_needed();
-    context.tag("<ol".to_string()); // To do: `ol` / `ul`.
-    context.expect_first_item = Some(true);
-}
-
 /// To do
 fn on_exit_list_item_value(context: &mut CompileContext) {
     let expect_first_item = context.expect_first_item.unwrap();
@@ -1232,21 +1280,24 @@ fn on_exit_list_item_value(context: &mut CompileContext) {
 }
 
 /// To do.
-/// Note: there is no actual `Token::ListItem`.
 fn on_exit_list_item(context: &mut CompileContext) {
     //  && !context.slurp_all_line_endings
     if context.last_was_tag {
         context.line_ending_if_needed();
     }
 
-    context.tag("</li>".to_string()); // To do: `ol` / `ul`.
+    context.tag("</li>".to_string());
     // context.slurp_all_line_endings = false;
 }
 
 /// To do.
 fn on_exit_list(context: &mut CompileContext) {
-    on_exit_list_item(context);
+    let tag_name = if context.events[context.index].token_type == Token::ListOrdered {
+        "ol"
+    } else {
+        "ul"
+    };
     context.tight_stack.pop();
     context.line_ending();
-    context.tag("</ol>".to_string()); // To do: `ol` / `ul`.
+    context.tag(format!("</{}>", tag_name));
 }
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 96b2496..b4ae9b1 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -1,14 +1,21 @@
 //! To do.
 
 use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};
-use crate::construct::partial_space_or_tab::space_or_tab_min_max;
+use crate::construct::{
+    blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max,
+};
 use crate::token::Token;
-use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
+use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, Tokenizer};
+use crate::util::{
+    edit_map::EditMap,
+    skip,
+    span::{codes as codes_from_span, from_exit_event},
+};
 
-/// Type of title.
+/// Type of list.
 #[derive(Debug, PartialEq)]
 enum Kind {
-    /// In a dot (`.`) list.
+    /// In a dot (`.`) list item.
     ///
     /// ## Example
     ///
@@ -16,7 +23,7 @@ enum Kind {
     /// 1. a
     /// ```
     Dot,
-    /// In a paren (`)`) list.
+    /// In a paren (`)`) list item.
     ///
     /// ## Example
     ///
@@ -24,7 +31,7 @@ enum Kind {
     /// 1) a
     /// ```
     Paren,
-    /// In an asterisk (`*`) list.
+    /// In an asterisk (`*`) list item.
     ///
     /// ## Example
     ///
@@ -32,7 +39,7 @@ enum Kind {
     /// * a
     /// ```
     Asterisk,
-    /// In a plus (`+`) list.
+    /// In a plus (`+`) list item.
     ///
     /// ## Example
     ///
@@ -40,7 +47,7 @@ enum Kind {
     /// + a
     /// ```
     Plus,
-    /// In a dash (`-`) list.
+    /// In a dash (`-`) list item.
     ///
     /// ## Example
     ///
@@ -51,16 +58,16 @@ enum Kind {
 }
 
 impl Kind {
-    /// Turn the kind into a [char].
-    fn as_char(&self) -> char {
-        match self {
-            Kind::Dot => '.',
-            Kind::Paren => ')',
-            Kind::Asterisk => '*',
-            Kind::Plus => '+',
-            Kind::Dash => '-',
-        }
-    }
+    // /// Turn the kind into a [char].
+    // fn as_char(&self) -> char {
+    //     match self {
+    //         Kind::Dot => '.',
+    //         Kind::Paren => ')',
+    //         Kind::Asterisk => '*',
+    //         Kind::Plus => '+',
+    //         Kind::Dash => '-',
+    //     }
+    // }
     /// Turn a [char] into a kind.
     ///
     /// ## Panics
@@ -101,13 +108,13 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         // Unordered.
         Code::Char('*' | '+' | '-') => {
             // To do: check if this is a thematic break?
-            tokenizer.enter(Token::List);
+            tokenizer.enter(Token::ListItem);
             tokenizer.enter(Token::ListItemPrefix);
             marker(tokenizer, code)
         }
         // Ordered.
         Code::Char(char) if char.is_ascii_digit() => {
-            tokenizer.enter(Token::List);
+            tokenizer.enter(Token::ListItem);
             tokenizer.enter(Token::ListItemPrefix);
             tokenizer.enter(Token::ListItemValue);
             // To do: `interrupt || !1`?
@@ -119,10 +126,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 
 /// To do.
 fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResult {
+    size += 1;
     match code {
         Code::Char(char) if char.is_ascii_digit() && size < LIST_ITEM_VALUE_SIZE_MAX => {
             tokenizer.consume(code);
-            size += 1;
             (State::Fn(Box::new(move |t, c| inside(t, c, size))), None)
         }
         // To do: `(!self.interrupt || size < 2)`
@@ -172,6 +179,7 @@ fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     // To do: calculate size.
     tokenizer.exit(Token::ListItemPrefix);
+    tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve));
     (State::Ok, Some(vec![code]))
 }
 
@@ -189,7 +197,163 @@ fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) ->
     (State::Ok, Some(vec![code]))
 }
 
-/// End of a block quote.
+/// To do.
+pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.check(blank_line, |ok| {
+        let func = if ok { blank_cont } else { not_blank_cont };
+        Box::new(func)
+    })(tokenizer, code)
+}
+
+pub fn blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    println!("cont: blank");
+    // self.containerState.furtherBlankLines =
+    //   self.containerState.furtherBlankLines ||
+    //   self.containerState.initialBlankLine
+
+    // We have a blank line.
+    // Still, try to consume at most the items size.
+    // To do: eat at most `size` whitespace.
+    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE), blank_cont_after)(tokenizer, code)
+}
+
+pub fn blank_cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    println!("cont: blank: after");
+    (State::Ok, Some(vec![code]))
+}
+
+pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    println!("cont: not blank");
+    // if (self.containerState.furtherBlankLines || !markdownSpace(code)) nok
+    // To do: eat exactly `size` whitespace.
+    tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), blank_cont_after)(tokenizer, code)
+}
+
+/// To do.
 pub fn end() -> Vec<Token> {
-    vec![Token::List]
+    vec![Token::ListItem]
+}
+
+/// To do.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+    let mut edit_map = EditMap::new();
+
+    let mut index = 0;
+    println!("list item:before: {:?}", tokenizer.events.len());
+    while index < tokenizer.events.len() {
+        let event = &tokenizer.events[index];
+        println!(
+            "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
+            index,
+            event.event_type,
+            event.token_type,
+            event.content_type,
+            event.previous,
+            event.next
+        );
+        index += 1;
+    }
+
+    let mut index = 0;
+    let mut balance = 0;
+    let mut list_items: Vec<(Kind, usize, usize, usize)> = vec![];
+    // To do: track balance? Or, check what’s between them?
+
+    while index < tokenizer.events.len() {
+        let event = &tokenizer.events[index];
+
+        if event.token_type == Token::ListItem {
+            if event.event_type == EventType::Enter {
+                let end = skip::opt(&tokenizer.events, index, &[Token::ListItem]) - 1;
+                let marker = skip::to(&tokenizer.events, index, &[Token::ListItemMarker]) + 1;
+                let codes = codes_from_span(
+                    &tokenizer.parse_state.codes,
+                    &from_exit_event(&tokenizer.events, marker),
+                );
+                let kind = Kind::from_code(codes[0]);
+                let current = (kind, balance, index, end);
+
+                let previous = list_items.last();
+                let mut matched = false;
+
+                // There’s a previous list item.
+                if let Some(previous) = previous {
+                    // …with the same marker and depth, and with only (blank) line endings between them.
+                    if previous.0 == current.0
+                        && previous.1 == current.1
+                        && skip::opt(
+                            &tokenizer.events,
+                            previous.3 + 1,
+                            &[Token::LineEnding, Token::BlankLineEnding],
+                        ) == current.2
+                    {
+                        matched = true;
+                    }
+                }
+
+                if matched {
+                    let previous = list_items.last_mut().unwrap();
+                    previous.3 = current.3;
+                } else {
+                    // let previous = list_items.pop();
+                    // if let Some(previous) = previous {
+                    //     lists.push(previous);
+                    // }
+
+                    println!("prev:!match {:?} {:?}", previous, current);
+                    list_items.push(current);
+                }
+
+                println!("enter: {:?}", event.token_type);
+                balance += 1;
+            } else {
+                println!("exit: {:?}", event.token_type);
+                balance -= 1;
+            }
+        }
+
+        index += 1;
+    }
+
+    let mut index = 0;
+    while index < list_items.len() {
+        let list_item = &list_items[index];
+        let mut list_start = tokenizer.events[list_item.2].clone();
+        let token_type = if matches!(list_item.0, Kind::Paren | Kind::Dot) {
+            Token::ListOrdered
+        } else {
+            Token::ListUnordered
+        };
+        list_start.token_type = token_type.clone();
+        let mut list_end = tokenizer.events[list_item.3].clone();
+        list_end.token_type = token_type;
+        println!("inject: {:?} {:?}", list_start, list_end);
+
+        edit_map.add(list_item.2, 0, vec![list_start]);
+        edit_map.add(list_item.3 + 1, 0, vec![list_end]);
+
+        index += 1;
+    }
+
+    println!("list items: {:#?}", list_items);
+
+    let events = edit_map.consume(&mut tokenizer.events);
+
+    let mut index = 0;
+    println!("list item:after: {:?}", events.len());
+    while index < events.len() {
+        let event = &events[index];
+        println!(
+            "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
+            index,
+            event.event_type,
+            event.token_type,
+            event.content_type,
+            event.previous,
+            event.next
+        );
+        index += 1;
+    }
+
+    events
 }
diff --git a/src/content/document.rs b/src/content/document.rs
index e32534e..c5bf5c8 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -10,7 +10,7 @@
 
 use crate::construct::{
     block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote},
-    list::{end as list_end, start as list},
+    list::{cont as list_const, end as list_end, start as list},
 };
 use crate::content::flow::start as flow;
 use crate::parser::ParseState;
@@ -100,7 +100,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR
         let cont = if name == "blockquote" {
             block_quote_cont
         } else if name == "list" {
-            unreachable!("todo: list cont {:?}", name)
+            list_const
         } else {
             unreachable!("todo: cont construct {:?}", name)
         };
@@ -183,7 +183,8 @@ fn there_is_a_new_container(
     name: String,
 ) -> StateFnResult {
     let size = info.continued;
-    info = exit_containers(tokenizer, info, size, true);
+    println!("exit:0: {:?}", false);
+    info = exit_containers(tokenizer, info, size, false);
     tokenizer.expect(code, true);
 
     // Remove from the event stack.
@@ -272,6 +273,7 @@ fn exit_containers(
         let mut index = 0;
         while index < token_types.len() {
             let token_type = &token_types[index];
+            println!("creating exit: {:?}", token_type);
 
             exits.push(Event {
                 event_type: EventType::Exit,
@@ -289,7 +291,16 @@ fn exit_containers(
     }
 
     if !exits.is_empty() {
-        let index = info.inject.len() - 1 - (if before { 1 } else { 0 });
+        let before = if before { 1 } else { 0 };
+        let mut index = info.inject.len() - 1;
+        println!("inject: {:?} {:?}", info.inject.len() - 1, before);
+        if before >= index {
+            // To do: maybe, if this branch happens, it’s a bug?
+            println!("inject:0: {:?}", index);
+            index = 0;
+        } else {
+            println!("set: {:?}", index);
+        }
         info.inject[index].1.append(&mut exits);
     }
 
@@ -377,6 +388,7 @@ fn flow_end(
     }
 
     // Exit containers.
+    println!("exit:1: {:?}", true);
     info = exit_containers(tokenizer, info, continued, true);
     tokenizer.expect(code, true);
 
@@ -386,6 +398,7 @@ fn flow_end(
 
     match result {
         State::Ok => {
+            println!("exit:3: {:?}", false);
             info = exit_containers(tokenizer, info, 0, false);
             tokenizer.expect(code, true);
 
@@ -433,7 +446,7 @@ fn flow_end(
             tokenizer.events = map.consume(&mut tokenizer.events);
             let mut index = 0;
 
-            println!("after: {:?}", tokenizer.events.len());
+            println!("document:after: {:?}", tokenizer.events.len());
             while index < tokenizer.events.len() {
                 let event = &tokenizer.events[index];
                 println!(
diff --git a/src/token.rs b/src/token.rs
index f60f9cd..889c3ba 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -1767,7 +1767,9 @@ pub enum Token {
     ThematicBreakSequence,
 
     // To do: sort.
-    List,
+    ListOrdered,
+    ListUnordered,
+    ListItem,
     ListItemPrefix,
     ListItemValue,
     ListItemMarker,
diff --git a/src/util/skip.rs b/src/util/skip.rs
index 971beb6..3307734 100644
--- a/src/util/skip.rs
+++ b/src/util/skip.rs
@@ -1,7 +1,7 @@
 //! Utilities to deal with lists of events.
 
 use crate::token::Token;
-use crate::tokenizer::Event;
+use crate::tokenizer::{Event, EventType};
 
 /// Skip from `index`, optionally past `token_types`.
 pub fn opt(events: &[Event], index: usize, token_types: &[Token]) -> usize {
@@ -13,33 +13,60 @@ pub fn opt_back(events: &[Event], index: usize, token_types: &[Token]) -> usize
     skip_opt_with_direction(events, index, token_types, false)
 }
 
+pub fn to(events: &[Event], mut index: usize, token_types: &[Token]) -> usize {
+    while index < events.len() {
+        let current = &events[index].token_type;
+
+        if token_types.contains(current) {
+            break;
+        }
+
+        index += 1;
+    }
+
+    index
+}
+
 /// Skip internals.
 fn skip_opt_with_direction(
     events: &[Event],
-    index: usize,
+    mut index: usize,
     token_types: &[Token],
     forward: bool,
 ) -> usize {
-    let mut index = index;
+    let mut balance = 0;
+    let open = if forward {
+        EventType::Enter
+    } else {
+        EventType::Exit
+    };
 
     while index < events.len() {
         let current = &events[index].token_type;
 
-        if !token_types.contains(current) {
+        if !token_types.contains(current) || events[index].event_type != open {
             break;
         }
 
-        // assert_eq!(events[index].event_type, EventType::Enter);
         index = if forward { index + 1 } else { index - 1 };
+        balance += 1;
 
         loop {
-            if events[index].token_type == *current {
-                // assert_eq!(events[index].event_type, EventType::Exit);
+            balance = if events[index].event_type == open {
+                balance + 1
+            } else {
+                balance - 1
+            };
+
+            if events[index].token_type == *current && balance == 0 {
+                println!("close:it! {:?} {:?}", events[index].token_type, balance);
                 index = if forward { index + 1 } else { index - 1 };
+                println!("index:break: {:?}", index);
                 break;
             }
 
             index = if forward { index + 1 } else { index - 1 };
+            println!("index:loop: {:?}", index);
         }
     }
author	Titus Wormer <tituswormer@gmail.com>	2022-07-12 17:47:08 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-12 17:47:08 +0200
commit	2e3b7abaa9877b658fa4f8f2612acc617dff60bb (patch)
tree	e823d041521a4af33a7e552ba58f1d4b63335be3 /src
parent	75522b867b15b9a400275cfec9a2ead4ff535473 (diff)
download	markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.tar.gz markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.tar.bz2 markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.zip