Add initial support for lists

author: Titus Wormer <tituswormer@gmail.com> 2022-07-12 13:00:53 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-12 13:00:53 +0200
commit: 75522b867b15b9a400275cfec9a2ead4ff535473 (patch)
tree: 4a9511f6b8899e1ead2ca02686ffd571b1bd4e1f /src
parent: 2ce19d9fd8f75ee1e3d62762e91f5d18303d4d6b (diff)
download: markdown-rs-75522b867b15b9a400275cfec9a2ead4ff535473.tar.gz
markdown-rs-75522b867b15b9a400275cfec9a2ead4ff535473.tar.bz2
markdown-rs-75522b867b15b9a400275cfec9a2ead4ff535473.zip
6 files changed, 308 insertions, 15 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 8a28654..753d85f 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -235,6 +235,7 @@ struct CompileContext<'a> {
     pub code_flow_seen_data: Option<bool>,
     pub code_fenced_fences_count: Option<usize>,
     pub character_reference_kind: Option<CharacterReferenceKind>,
+    pub expect_first_item: Option<bool>,
     pub media_stack: Vec<Media>,
     pub definitions: HashMap<String, Definition>,
     pub tight_stack: Vec<bool>,
@@ -269,6 +270,7 @@ impl<'a> CompileContext<'a> {
             code_flow_seen_data: None,
             code_fenced_fences_count: None,
             character_reference_kind: None,
+            expect_first_item: None,
             media_stack: vec![],
             definitions: HashMap::new(),
             tight_stack: vec![],
@@ -379,7 +381,7 @@ impl<'a> CompileContext<'a> {
 /// Turn events and codes into a string of HTML.
 #[allow(clippy::too_many_lines)]
 pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
-    // let mut slurp_all_line_endings = false;
+    // let slurp_all_line_endings = false;
     let mut index = 0;
     let mut line_ending_inferred: Option<LineEnding> = None;
 
@@ -443,6 +445,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
     enter_map.insert(Token::ResourceTitleString, on_enter_buffer);
     enter_map.insert(Token::Strong, on_enter_strong);
 
+    // To do: sort.
+    enter_map.insert(Token::ListItemMarker, on_enter_list_item_marker);
+    enter_map.insert(Token::List, on_enter_list);
+
     let mut exit_map: Map = HashMap::new();
     exit_map.insert(Token::AutolinkEmail, on_exit_autolink_email);
     exit_map.insert(Token::AutolinkProtocol, on_exit_autolink_protocol);
@@ -488,7 +494,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
         on_exit_definition_title_string,
     );
     exit_map.insert(Token::Emphasis, on_exit_emphasis);
-
     exit_map.insert(Token::HardBreakEscape, on_exit_break);
     exit_map.insert(Token::HardBreakTrailing, on_exit_break);
     exit_map.insert(Token::HeadingAtx, on_exit_heading_atx);
@@ -519,6 +524,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
     exit_map.insert(Token::Strong, on_exit_strong);
     exit_map.insert(Token::ThematicBreak, on_exit_thematic_break);
 
+    // To do: sort.
+    exit_map.insert(Token::List, on_exit_list);
+    exit_map.insert(Token::ListItemValue, on_exit_list_item_value);
+
     // Handle one event.
     let handle = |context: &mut CompileContext, index: usize| {
         let event = &events[index];
@@ -766,7 +775,7 @@ fn on_exit_block_quote(context: &mut CompileContext) {
     context.tight_stack.pop();
     context.line_ending_if_needed();
     context.tag("</blockquote>".to_string());
-    // let mut slurp_all_line_endings = false;
+    // context.slurp_all_line_endings = false;
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker].
@@ -1056,7 +1065,7 @@ fn on_exit_label_text(context: &mut CompileContext) {
 
 /// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding].
 fn on_exit_line_ending(context: &mut CompileContext) {
-    // if slurp_all_line_endings {
+    // if context.slurp_all_line_endings {
     //     // Empty.
     // } else
     if context.slurp_one_line_ending {
@@ -1176,3 +1185,68 @@ fn on_exit_thematic_break(context: &mut CompileContext) {
     context.line_ending_if_needed();
     context.tag("<hr />".to_string());
 }
+
+// To do: sort.
+/// To do
+fn on_enter_list_item_marker(context: &mut CompileContext) {
+    let expect_first_item = context.expect_first_item.take().unwrap();
+
+    if expect_first_item {
+        context.tag(">".to_string());
+    } else {
+        on_exit_list_item(context);
+    }
+
+    context.line_ending_if_needed();
+    context.tag("<li>".to_string());
+    context.expect_first_item = Some(false);
+    // “Hack” to prevent a line ending from showing up if the item is empty.
+    context.last_was_tag = false;
+}
+
+/// To do (onenterlist{un,}ordered)
+fn on_enter_list(context: &mut CompileContext) {
+    // To do: !token._loose
+    context.tight_stack.push(false);
+    context.line_ending_if_needed();
+    context.tag("<ol".to_string()); // To do: `ol` / `ul`.
+    context.expect_first_item = Some(true);
+}
+
+/// To do
+fn on_exit_list_item_value(context: &mut CompileContext) {
+    let expect_first_item = context.expect_first_item.unwrap();
+
+    if expect_first_item {
+        let slice = serialize(
+            context.codes,
+            &from_exit_event(context.events, context.index),
+            false,
+        );
+        let value = slice.parse::<u32>().ok().unwrap();
+
+        if value != 1 {
+            context.tag(format!(" start=\"{}\"", encode(&value.to_string())));
+        }
+    }
+}
+
+/// To do.
+/// Note: there is no actual `Token::ListItem`.
+fn on_exit_list_item(context: &mut CompileContext) {
+    //  && !context.slurp_all_line_endings
+    if context.last_was_tag {
+        context.line_ending_if_needed();
+    }
+
+    context.tag("</li>".to_string()); // To do: `ol` / `ul`.
+    // context.slurp_all_line_endings = false;
+}
+
+/// To do.
+fn on_exit_list(context: &mut CompileContext) {
+    on_exit_list_item(context);
+    context.tight_stack.pop();
+    context.line_ending();
+    context.tag("</ol>".to_string()); // To do: `ol` / `ul`.
+}
diff --git a/src/constant.rs b/src/constant.rs
index fc74316..5dd2435 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -193,6 +193,10 @@ pub const HTML_RAW_SIZE_MAX: usize = 8;
 /// To safeguard performance, labels are capped at a large number: `999`.
 pub const LINK_REFERENCE_SIZE_MAX: usize = 999;
 
+/// To do.
+/// See: <https://spec.commonmark.org/0.30/#ordered-list-marker>
+pub const LIST_ITEM_VALUE_SIZE_MAX: usize = 10;
+
 /// Maximum allowed unbalanced parens in destination.
 ///
 /// There can be many balanced parens, but if there are 33 opens that were not
diff --git a/src/construct/list.rs b/src/construct/list.rs
new file mode 100644
index 0000000..96b2496
--- /dev/null
+++ b/src/construct/list.rs
@@ -0,0 +1,195 @@
+//! To do.
+
+use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};
+use crate::construct::partial_space_or_tab::space_or_tab_min_max;
+use crate::token::Token;
+use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
+
+/// Type of title.
+#[derive(Debug, PartialEq)]
+enum Kind {
+    /// In a dot (`.`) list.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// 1. a
+    /// ```
+    Dot,
+    /// In a paren (`)`) list.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// 1) a
+    /// ```
+    Paren,
+    /// In an asterisk (`*`) list.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// * a
+    /// ```
+    Asterisk,
+    /// In a plus (`+`) list.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// + a
+    /// ```
+    Plus,
+    /// In a dash (`-`) list.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// - a
+    /// ```
+    Dash,
+}
+
+impl Kind {
+    /// Turn the kind into a [char].
+    fn as_char(&self) -> char {
+        match self {
+            Kind::Dot => '.',
+            Kind::Paren => ')',
+            Kind::Asterisk => '*',
+            Kind::Plus => '+',
+            Kind::Dash => '-',
+        }
+    }
+    /// Turn a [char] into a kind.
+    ///
+    /// ## Panics
+    ///
+    /// Panics if `char` is not `.`, `)`, `*`, `+`, or `-`.
+    fn from_char(char: char) -> Kind {
+        match char {
+            '.' => Kind::Dot,
+            ')' => Kind::Paren,
+            '*' => Kind::Asterisk,
+            '+' => Kind::Plus,
+            '-' => Kind::Dash,
+            _ => unreachable!("invalid char"),
+        }
+    }
+    /// Turn [Code] into a kind.
+    ///
+    /// ## Panics
+    ///
+    /// Panics if `code` is not `Code::Char('.' | ')' | '*' | '+' | '-')`.
+    fn from_code(code: Code) -> Kind {
+        match code {
+            Code::Char(char) => Kind::from_char(char),
+            _ => unreachable!("invalid code"),
+        }
+    }
+}
+
+/// To do.
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // To do: allow arbitrary when code (indented) is turned off.
+    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
+}
+
+/// To do.
+fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        // Unordered.
+        Code::Char('*' | '+' | '-') => {
+            // To do: check if this is a thematic break?
+            tokenizer.enter(Token::List);
+            tokenizer.enter(Token::ListItemPrefix);
+            marker(tokenizer, code)
+        }
+        // Ordered.
+        Code::Char(char) if char.is_ascii_digit() => {
+            tokenizer.enter(Token::List);
+            tokenizer.enter(Token::ListItemPrefix);
+            tokenizer.enter(Token::ListItemValue);
+            // To do: `interrupt || !1`?
+            inside(tokenizer, code, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// To do.
+fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResult {
+    match code {
+        Code::Char(char) if char.is_ascii_digit() && size < LIST_ITEM_VALUE_SIZE_MAX => {
+            tokenizer.consume(code);
+            size += 1;
+            (State::Fn(Box::new(move |t, c| inside(t, c, size))), None)
+        }
+        // To do: `(!self.interrupt || size < 2)`
+        Code::Char('.' | ')') => {
+            tokenizer.exit(Token::ListItemValue);
+            marker(tokenizer, code)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// To do.
+fn marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    let kind = Kind::from_code(code);
+    println!("list item kind: {:?}", kind);
+    tokenizer.enter(Token::ListItemMarker);
+    tokenizer.consume(code);
+    tokenizer.exit(Token::ListItemMarker);
+    // To do: check blank line, if true `State::Nok` else `on_blank`.
+    (State::Fn(Box::new(marker_after)), None)
+}
+
+/// To do.
+fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.attempt(list_item_prefix_whitespace, |ok| {
+        let func = if ok { prefix_end } else { prefix_other };
+        Box::new(func)
+    })(tokenizer, code)
+}
+
+// To do: `on_blank`.
+
+/// To do.
+fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.enter(Token::SpaceOrTab);
+            tokenizer.consume(code);
+            tokenizer.exit(Token::SpaceOrTab);
+            (State::Fn(Box::new(prefix_end)), None)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// To do.
+fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // To do: calculate size.
+    tokenizer.exit(Token::ListItemPrefix);
+    (State::Ok, Some(vec![code]))
+}
+
+/// To do.
+fn list_item_prefix_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // To do: check how big this should be?
+    tokenizer.go(
+        space_or_tab_min_max(1, TAB_SIZE - 1),
+        list_item_prefix_whitespace_after,
+    )(tokenizer, code)
+}
+
+fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // To do: check some stuff?
+    (State::Ok, Some(vec![code]))
+}
+
+/// End of a block quote.
+pub fn end() -> Vec<Token> {
+    vec![Token::List]
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 06ff4e9..be9dfe3 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -33,7 +33,7 @@
 //! *   [label end][label_end]
 //! *   [label start (image)][label_start_image]
 //! *   [label start (link)][label_start_link]
-//! *   list
+//! *   [list][]
 //! *   [paragraph][]
 //! *   [thematic break][thematic_break]
 //!
@@ -42,6 +42,7 @@
 //! *   [data][partial_data]
 //! *   [destination][partial_destination]
 //! *   [label][partial_label]
+//! *   [non lazy continuation][partial_non_lazy_continuation]
 //! *   [space or tab][partial_space_or_tab]
 //! *   [title][partial_title]
 //! *   [whitespace][partial_whitespace]
@@ -80,6 +81,7 @@ pub mod html_text;
 pub mod label_end;
 pub mod label_start_image;
 pub mod label_start_link;
+pub mod list;
 pub mod paragraph;
 pub mod partial_data;
 pub mod partial_destination;
diff --git a/src/content/document.rs b/src/content/document.rs
index a8ff775..e32534e 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -8,8 +8,9 @@
 //! *   [Block quote][crate::construct::block_quote]
 //! *   List
 
-use crate::construct::block_quote::{
-    cont as block_quote_cont, end as block_quote_end, start as block_quote,
+use crate::construct::{
+    block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote},
+    list::{end as list_end, start as list},
 };
 use crate::content::flow::start as flow;
 use crate::parser::ParseState;
@@ -96,9 +97,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR
     // continuation line.
     if info.continued < info.stack.len() {
         let name = &info.stack[info.continued];
-        // To do: list.
         let cont = if name == "blockquote" {
             block_quote_cont
+        } else if name == "list" {
+            unreachable!("todo: list cont {:?}", name)
         } else {
             unreachable!("todo: cont construct {:?}", name)
         };
@@ -157,12 +159,19 @@ fn check_new_containers(
     }
 
     // Check if there is a new container.
-    // To do: list.
     tokenizer.attempt(block_quote, move |ok| {
         if ok {
             Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string()))
         } else {
-            Box::new(|t, c| there_is_no_new_container(t, c, info))
+            Box::new(|tokenizer, code| {
+                tokenizer.attempt(list, move |ok| {
+                    if ok {
+                        Box::new(|t, c| there_is_a_new_container(t, c, info, "list".to_string()))
+                    } else {
+                        Box::new(|t, c| there_is_no_new_container(t, c, info))
+                    }
+                })(tokenizer, code)
+            })
         }
     })(tokenizer, code)
 }
@@ -179,11 +188,12 @@ fn there_is_a_new_container(
 
     // Remove from the event stack.
     // We’ll properly add exits at different points manually.
-    // To do: list.
     let end = if name == "blockquote" {
         block_quote_end
+    } else if name == "list" {
+        list_end
     } else {
-        unreachable!("todo: cont {:?}", name)
+        unreachable!("todo: end {:?}", name)
     };
 
     let token_types = end();
@@ -249,11 +259,12 @@ fn exit_containers(
     while info.stack.len() > size {
         let name = info.stack.pop().unwrap();
 
-        // To do: list.
         let end = if name == "blockquote" {
             block_quote_end
+        } else if name == "list" {
+            list_end
         } else {
-            unreachable!("todo: cont {:?}", name)
+            unreachable!("todo: end {:?}", name)
         };
 
         let token_types = end();
@@ -265,7 +276,7 @@ fn exit_containers(
             exits.push(Event {
                 event_type: EventType::Exit,
                 token_type: token_type.clone(),
-                // To do: fix position later.
+                // Note: positions are fixed later.
                 point: tokenizer.point.clone(),
                 index: tokenizer.index,
                 previous: None,
diff --git a/src/token.rs b/src/token.rs
index 9b59719..f60f9cd 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -1765,4 +1765,11 @@ pub enum Token {
     ///     ^ ^ ^
     /// ```
     ThematicBreakSequence,
+
+    // To do: sort.
+    List,
+    ListItemPrefix,
+    ListItemValue,
+    ListItemMarker,
+    // ListItemPrefixSpaceOrTab,
 }
author	Titus Wormer <tituswormer@gmail.com>	2022-07-12 13:00:53 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-12 13:00:53 +0200
commit	75522b867b15b9a400275cfec9a2ead4ff535473 (patch)
tree	4a9511f6b8899e1ead2ca02686ffd571b1bd4e1f /src
parent	2ce19d9fd8f75ee1e3d62762e91f5d18303d4d6b (diff)
download	markdown-rs-75522b867b15b9a400275cfec9a2ead4ff535473.tar.gz markdown-rs-75522b867b15b9a400275cfec9a2ead4ff535473.tar.bz2 markdown-rs-75522b867b15b9a400275cfec9a2ead4ff535473.zip