From 6e20c3e79d4270fafb13a63af51eaffaa45c11e1 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Mon, 15 Aug 2022 11:24:06 +0200
Subject: Refactor to rename list construct to list item

---
 src/constant.rs                 |   5 +-
 src/construct/blank_line.rs     |   4 +-
 src/construct/list.rs           | 460 ----------------------------------------
 src/construct/list_item.rs      | 460 ++++++++++++++++++++++++++++++++++++++++
 src/construct/mod.rs            |   4 +-
 src/construct/thematic_break.rs |   4 +-
 src/content/document.rs         |   6 +-
 src/event.rs                    |  12 +-
 src/lib.rs                      |   6 +-
 src/resolve.rs                  |   2 +-
 src/state.rs                    |  60 +++---
 11 files changed, 512 insertions(+), 511 deletions(-)
 delete mode 100644 src/construct/list.rs
 create mode 100644 src/construct/list_item.rs

(limited to 'src')
diff --git a/src/constant.rs b/src/constant.rs
index 6ef851c..47cb50c 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -202,7 +202,8 @@ pub const HTML_RAW_SIZE_MAX: usize = 8;
 /// To safeguard performance, labels are capped at a large number: `999`.
 pub const LINK_REFERENCE_SIZE_MAX: usize = 999;
 
-/// The max number of decimals allowed to form an (ordered) [list][] item.
+/// The max number of decimals allowed to form an (ordered)
+/// [list item][list-item].
 ///
 /// `CommonMark` caps this at 10 digits (9 is fine, 10 not).
 /// This limit is imposed because bigger numbers result in integer overflows
@@ -212,7 +213,7 @@ pub const LINK_REFERENCE_SIZE_MAX: usize = 999;
 ///
 /// *   [*§ 5.2 List items* in `CommonMark`](https://spec.commonmark.org/0.30/#ordered-list-marker)
 ///
-/// [list]: crate::construct::list
+/// [list-item]: crate::construct::list_item
 pub const LIST_ITEM_VALUE_SIZE_MAX: usize = 10;
 
 /// Maximum allowed unbalanced parens in destination.
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index 7f1d023..87d257d 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -12,7 +12,7 @@
 //! such as between two [heading (atx)][heading-atx]s.
 //! Sometimes, whether blank lines are present, changes the behavior of how
 //! HTML is rendered, such as whether blank lines are present between list
-//! items in a [list][].
+//! items in a [list][list-item].
 //! More than one blank line is never needed in `CommonMark`.
 //!
 //! Because blank lines can be empty (line endings are not considered part of
@@ -28,7 +28,7 @@
 //! *   [*§ 4.9 Blank lines* in `CommonMark`](https://spec.commonmark.org/0.30/#blank-lines)
 //!
 //! [heading-atx]: crate::construct::heading_atx
-//! [list]: crate::construct::list
+//! [list-item]: crate::construct::list_item
 //! [paragraph]: crate::construct::paragraph
 //! [flow]: crate::content::flow
 
diff --git a/src/construct/list.rs b/src/construct/list.rs
deleted file mode 100644
index 596330c..0000000
--- a/src/construct/list.rs
+++ /dev/null
@@ -1,460 +0,0 @@
-//! List is a construct that occurs in the [document][] content type.
-//!
-//! It forms with, roughly, the following BNF:
-//!
-//! ```bnf
-//! ; Restriction: there must be `eol | space_or_tab` after the start.
-//! ; Restriction: if the first line after the marker is not blank and starts with `5( space_or_tab )`,
-//! ; only the first `space_or_tab` is part of the start.
-//! list_item_start ::= '*' | '+' | '-' | 1*9( ascii_decimal ) ( '.' | ')' ) [ 1*4 space_or_tab ]
-//! ; Restriction: blank line allowed, except when this is the first continuation after a blank start.
-//! ; Restriction: if not blank, the line must be indented, exactly `n` times.
-//! list_item_cont ::= [ n( space_or_tab ) ]
-//! ```
-//!
-//! Further lines that are not prefixed with `list_item_cont` cause the item
-//! to be exited, except when those lines are lazy continuation.
-//! Like so many things in markdown, list (items) too, are very complex.
-//! See [*§ Phase 1: block structure*][commonmark-block] for more on parsing
-//! details.
-//!
-//! Lists relates to the `<li>`, `<ol>`, and `<ul>` elements in HTML.
-//! See [*§ 4.4.8 The `li` element*][html-li],
-//! [*§ 4.4.5 The `ol` element*][html-ol], and
-//! [*§ 4.4.7 The `ul` element*][html-ul] in the HTML spec for more info.
-//!
-//! ## Tokens
-//!
-//! *   [`ListItem`][Name::ListItem]
-//! *   [`ListItemMarker`][Name::ListItemMarker]
-//! *   [`ListItemPrefix`][Name::ListItemPrefix]
-//! *   [`ListItemValue`][Name::ListItemValue]
-//! *   [`ListOrdered`][Name::ListOrdered]
-//! *   [`ListUnordered`][Name::ListUnordered]
-//!
-//! ## References
-//!
-//! *   [`list.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/list.js)
-//! *   [*§ 5.2 List items* in `CommonMark`](https://spec.commonmark.org/0.30/#list-items)
-//! *   [*§ 5.3 Lists* in `CommonMark`](https://spec.commonmark.org/0.30/#lists)
-//!
-//! [document]: crate::content::document
-//! [html-li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element
-//! [html-ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element
-//! [html-ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element
-//! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure
-
-use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};
-use crate::construct::partial_space_or_tab::space_or_tab_min_max;
-use crate::event::{Kind, Name};
-use crate::resolve::Name as ResolveName;
-use crate::state::{Name as StateName, State};
-use crate::tokenizer::Tokenizer;
-use crate::util::{
-    skip,
-    slice::{Position, Slice},
-};
-
-/// Start of list item.
-///
-/// ```markdown
-/// > | * a
-///     ^
-/// ```
-pub fn start(tokenizer: &mut Tokenizer) -> State {
-    if tokenizer.parse_state.constructs.list {
-        tokenizer.enter(Name::ListItem);
-
-        if matches!(tokenizer.current, Some(b'\t' | b' ')) {
-            tokenizer.attempt(State::Next(StateName::ListBefore), State::Nok);
-            State::Retry(space_or_tab_min_max(
-                tokenizer,
-                0,
-                if tokenizer.parse_state.constructs.code_indented {
-                    TAB_SIZE - 1
-                } else {
-                    usize::MAX
-                },
-            ))
-        } else {
-            State::Retry(StateName::ListBefore)
-        }
-    } else {
-        State::Nok
-    }
-}
-
-/// After optional whitespace, at list item prefix.
-///
-/// ```markdown
-/// > | * a
-///     ^
-/// ```
-pub fn before(tokenizer: &mut Tokenizer) -> State {
-    // Unordered.
-    if matches!(tokenizer.current, Some(b'*' | b'-')) {
-        tokenizer.check(State::Nok, State::Next(StateName::ListBeforeUnordered));
-        State::Retry(StateName::ThematicBreakStart)
-    } else if tokenizer.current == Some(b'+') {
-        State::Retry(StateName::ListBeforeUnordered)
-    }
-    // Ordered.
-    else if tokenizer.current == Some(b'1')
-        || (matches!(tokenizer.current, Some(b'0'..=b'9')) && !tokenizer.interrupt)
-    {
-        State::Retry(StateName::ListBeforeOrdered)
-    } else {
-        State::Nok
-    }
-}
-
-/// At unordered list item marker.
-///
-/// The line is not a thematic break.
-///
-/// ```markdown
-/// > | * a
-///     ^
-/// ```
-pub fn before_unordered(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.enter(Name::ListItemPrefix);
-    State::Retry(StateName::ListMarker)
-}
-
-/// At ordered list item value.
-///
-/// ```markdown
-/// > | * a
-///     ^
-/// ```
-pub fn before_ordered(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.enter(Name::ListItemPrefix);
-    tokenizer.enter(Name::ListItemValue);
-    State::Retry(StateName::ListValue)
-}
-
-/// In ordered list item value.
-///
-/// ```markdown
-/// > | 1. a
-///     ^
-/// ```
-pub fn value(tokenizer: &mut Tokenizer) -> State {
-    if matches!(tokenizer.current, Some(b'.' | b')'))
-        && (!tokenizer.interrupt || tokenizer.tokenize_state.size < 2)
-    {
-        tokenizer.exit(Name::ListItemValue);
-        State::Retry(StateName::ListMarker)
-    } else if matches!(tokenizer.current, Some(b'0'..=b'9'))
-        && tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX
-    {
-        tokenizer.tokenize_state.size += 1;
-        tokenizer.consume();
-        State::Next(StateName::ListValue)
-    } else {
-        tokenizer.tokenize_state.size = 0;
-        State::Nok
-    }
-}
-
-/// At list item marker.
-///
-/// ```markdown
-/// > | * a
-///     ^
-/// > | 1. b
-///      ^
-/// ```
-pub fn marker(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.enter(Name::ListItemMarker);
-    tokenizer.consume();
-    tokenizer.exit(Name::ListItemMarker);
-    State::Next(StateName::ListMarkerAfter)
-}
-
-/// After list item marker.
-///
-/// ```markdown
-/// > | * a
-///      ^
-/// > | 1. b
-///       ^
-/// ```
-pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.tokenize_state.size = 1;
-    tokenizer.check(
-        State::Next(StateName::ListAfter),
-        State::Next(StateName::ListMarkerAfterFilled),
-    );
-    State::Retry(StateName::BlankLineStart)
-}
-
-/// After list item marker.
-///
-/// The marker is not followed by a blank line.
-///
-/// ```markdown
-/// > | * a
-///      ^
-/// ```
-pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.tokenize_state.size = 0;
-
-    // Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace.
-    tokenizer.attempt(
-        State::Next(StateName::ListAfter),
-        State::Next(StateName::ListPrefixOther),
-    );
-    State::Retry(StateName::ListWhitespace)
-}
-
-/// After marker, at whitespace.
-///
-/// ```markdown
-/// > | * a
-///      ^
-/// ```
-pub fn whitespace(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.attempt(State::Next(StateName::ListWhitespaceAfter), State::Nok);
-    State::Retry(space_or_tab_min_max(tokenizer, 1, TAB_SIZE))
-}
-
-/// After acceptable whitespace.
-///
-/// ```markdown
-/// > | * a
-///      ^
-/// ```
-pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
-    if let Some(b'\t' | b' ') = tokenizer.current {
-        State::Nok
-    } else {
-        State::Ok
-    }
-}
-
-/// After marker, followed by no indent or more indent that needed.
-///
-/// ```markdown
-/// > | * a
-///      ^
-/// ```
-pub fn prefix_other(tokenizer: &mut Tokenizer) -> State {
-    match tokenizer.current {
-        Some(b'\t' | b' ') => {
-            tokenizer.enter(Name::SpaceOrTab);
-            tokenizer.consume();
-            tokenizer.exit(Name::SpaceOrTab);
-            State::Next(StateName::ListAfter)
-        }
-        _ => State::Nok,
-    }
-}
-
-/// After list item prefix.
-///
-/// ```markdown
-/// > | * a
-///       ^
-/// ```
-pub fn after(tokenizer: &mut Tokenizer) -> State {
-    let blank = tokenizer.tokenize_state.size == 1;
-    tokenizer.tokenize_state.size = 0;
-
-    if blank && tokenizer.interrupt {
-        State::Nok
-    } else {
-        let start = skip::to_back(
-            &tokenizer.events,
-            tokenizer.events.len() - 1,
-            &[Name::ListItem],
-        );
-        let mut prefix = Slice::from_position(
-            tokenizer.parse_state.bytes,
-            &Position {
-                start: &tokenizer.events[start].point,
-                end: &tokenizer.point,
-            },
-        )
-        .len();
-
-        if blank {
-            prefix += 1;
-        }
-
-        let container = &mut tokenizer.tokenize_state.document_container_stack
-            [tokenizer.tokenize_state.document_continued];
-
-        container.blank_initial = blank;
-        container.size = prefix;
-
-        tokenizer.exit(Name::ListItemPrefix);
-        tokenizer.register_resolver_before(ResolveName::List);
-        State::Ok
-    }
-}
-
-/// Start of list item continuation.
-///
-/// ```markdown
-///   | * a
-/// > |   b
-///     ^
-/// ```
-pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.check(
-        State::Next(StateName::ListContBlank),
-        State::Next(StateName::ListContFilled),
-    );
-    State::Retry(StateName::BlankLineStart)
-}
-
-/// Start of blank list item continuation.
-///
-/// ```markdown
-///   | * a
-/// > |
-///     ^
-///   |   b
-/// ```
-pub fn cont_blank(tokenizer: &mut Tokenizer) -> State {
-    let container = &mut tokenizer.tokenize_state.document_container_stack
-        [tokenizer.tokenize_state.document_continued];
-    let size = container.size;
-
-    if container.blank_initial {
-        State::Nok
-    } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
-        // Consume, optionally, at most `size`.
-        State::Retry(space_or_tab_min_max(tokenizer, 0, size))
-    } else {
-        State::Ok
-    }
-}
-
-/// Start of non-blank list item continuation.
-///
-/// ```markdown
-///   | * a
-/// > |   b
-///     ^
-/// ```
-pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
-    let container = &mut tokenizer.tokenize_state.document_container_stack
-        [tokenizer.tokenize_state.document_continued];
-    let size = container.size;
-
-    container.blank_initial = false;
-
-    if matches!(tokenizer.current, Some(b'\t' | b' ')) {
-        // Consume exactly `size`.
-        State::Retry(space_or_tab_min_max(tokenizer, size, size))
-    } else {
-        State::Nok
-    }
-}
-
-/// Find adjacent list items with the same marker.
-pub fn resolve(tokenizer: &mut Tokenizer) {
-    let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
-    let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
-    let mut index = 0;
-    let mut balance = 0;
-
-    // Merge list items.
-    while index < tokenizer.events.len() {
-        let event = &tokenizer.events[index];
-
-        if event.name == Name::ListItem {
-            if event.kind == Kind::Enter {
-                let end = skip::opt(&tokenizer.events, index, &[Name::ListItem]) - 1;
-                let marker = skip::to(&tokenizer.events, index, &[Name::ListItemMarker]);
-                // Guaranteed to be a valid ASCII byte.
-                let marker = Slice::from_index(
-                    tokenizer.parse_state.bytes,
-                    tokenizer.events[marker].point.index,
-                )
-                .head()
-                .unwrap();
-                let current = (marker, balance, index, end);
-
-                let mut list_index = lists_wip.len();
-                let mut matched = false;
-
-                while list_index > 0 {
-                    list_index -= 1;
-                    let previous = &lists_wip[list_index];
-                    let before = skip::opt(
-                        &tokenizer.events,
-                        previous.3 + 1,
-                        &[
-                            Name::SpaceOrTab,
-                            Name::LineEnding,
-                            Name::BlankLineEnding,
-                            Name::BlockQuotePrefix,
-                        ],
-                    );
-
-                    if previous.0 == current.0 && previous.1 == current.1 && before == current.2 {
-                        let previous_mut = &mut lists_wip[list_index];
-                        previous_mut.3 = current.3;
-                        lists.append(&mut lists_wip.split_off(list_index + 1));
-                        matched = true;
-                        break;
-                    }
-                }
-
-                if !matched {
-                    let mut index = lists_wip.len();
-                    let mut exit = None;
-
-                    while index > 0 {
-                        index -= 1;
-
-                        // If the current (new) item starts after where this
-                        // item on the stack ends, we can remove it from the
-                        // stack.
-                        if current.2 > lists_wip[index].3 {
-                            exit = Some(index);
-                        } else {
-                            break;
-                        }
-                    }
-
-                    if let Some(exit) = exit {
-                        lists.append(&mut lists_wip.split_off(exit));
-                    }
-
-                    lists_wip.push(current);
-                }
-
-                balance += 1;
-            } else {
-                balance -= 1;
-            }
-        }
-
-        index += 1;
-    }
-
-    lists.append(&mut lists_wip);
-
-    // Inject events.
-    let mut index = 0;
-    while index < lists.len() {
-        let list_item = &lists[index];
-        let mut list_start = tokenizer.events[list_item.2].clone();
-        let mut list_end = tokenizer.events[list_item.3].clone();
-        let name = match list_item.0 {
-            b'.' | b')' => Name::ListOrdered,
-            _ => Name::ListUnordered,
-        };
-        list_start.name = name.clone();
-        list_end.name = name;
-
-        tokenizer.map.add(list_item.2, 0, vec![list_start]);
-        tokenizer.map.add(list_item.3 + 1, 0, vec![list_end]);
-
-        index += 1;
-    }
-}
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
new file mode 100644
index 0000000..5161254
--- /dev/null
+++ b/src/construct/list_item.rs
@@ -0,0 +1,460 @@
+//! List item is a construct that occurs in the [document][] content type.
+//!
+//! It forms with, roughly, the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: there must be `eol | space_or_tab` after the start.
+//! ; Restriction: if the first line after the marker is not blank and starts with `5( space_or_tab )`,
+//! ; only the first `space_or_tab` is part of the start.
+//! list_item_start ::= '*' | '+' | '-' | 1*9( ascii_decimal ) ( '.' | ')' ) [ 1*4 space_or_tab ]
+//! ; Restriction: blank line allowed, except when this is the first continuation after a blank start.
+//! ; Restriction: if not blank, the line must be indented, exactly `n` times.
+//! list_item_cont ::= [ n( space_or_tab ) ]
+//! ```
+//!
+//! Further lines that are not prefixed with `list_item_cont` cause the item
+//! to be exited, except when those lines are lazy continuation.
+//! Like so many things in markdown, list (items) too, are very complex.
+//! See [*§ Phase 1: block structure*][commonmark-block] for more on parsing
+//! details.
+//!
+//! Lists relates to the `<li>`, `<ol>`, and `<ul>` elements in HTML.
+//! See [*§ 4.4.8 The `li` element*][html-li],
+//! [*§ 4.4.5 The `ol` element*][html-ol], and
+//! [*§ 4.4.7 The `ul` element*][html-ul] in the HTML spec for more info.
+//!
+//! ## Tokens
+//!
+//! *   [`ListItem`][Name::ListItem]
+//! *   [`ListItemMarker`][Name::ListItemMarker]
+//! *   [`ListItemPrefix`][Name::ListItemPrefix]
+//! *   [`ListItemValue`][Name::ListItemValue]
+//! *   [`ListOrdered`][Name::ListOrdered]
+//! *   [`ListUnordered`][Name::ListUnordered]
+//!
+//! ## References
+//!
+//! *   [`list.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/list.js)
+//! *   [*§ 5.2 List items* in `CommonMark`](https://spec.commonmark.org/0.30/#list-items)
+//! *   [*§ 5.3 Lists* in `CommonMark`](https://spec.commonmark.org/0.30/#lists)
+//!
+//! [document]: crate::content::document
+//! [html-li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element
+//! [html-ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element
+//! [html-ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element
+//! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure
+
+use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};
+use crate::construct::partial_space_or_tab::space_or_tab_min_max;
+use crate::event::{Kind, Name};
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+use crate::util::{
+    skip,
+    slice::{Position, Slice},
+};
+
+/// Start of list item.
+///
+/// ```markdown
+/// > | * a
+///     ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+    if tokenizer.parse_state.constructs.list_item {
+        tokenizer.enter(Name::ListItem);
+
+        if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+            tokenizer.attempt(State::Next(StateName::ListItemBefore), State::Nok);
+            State::Retry(space_or_tab_min_max(
+                tokenizer,
+                0,
+                if tokenizer.parse_state.constructs.code_indented {
+                    TAB_SIZE - 1
+                } else {
+                    usize::MAX
+                },
+            ))
+        } else {
+            State::Retry(StateName::ListItemBefore)
+        }
+    } else {
+        State::Nok
+    }
+}
+
+/// After optional whitespace, at list item prefix.
+///
+/// ```markdown
+/// > | * a
+///     ^
+/// ```
+pub fn before(tokenizer: &mut Tokenizer) -> State {
+    // Unordered.
+    if matches!(tokenizer.current, Some(b'*' | b'-')) {
+        tokenizer.check(State::Nok, State::Next(StateName::ListItemBeforeUnordered));
+        State::Retry(StateName::ThematicBreakStart)
+    } else if tokenizer.current == Some(b'+') {
+        State::Retry(StateName::ListItemBeforeUnordered)
+    }
+    // Ordered.
+    else if tokenizer.current == Some(b'1')
+        || (matches!(tokenizer.current, Some(b'0'..=b'9')) && !tokenizer.interrupt)
+    {
+        State::Retry(StateName::ListItemBeforeOrdered)
+    } else {
+        State::Nok
+    }
+}
+
+/// At unordered list item marker.
+///
+/// The line is not a thematic break.
+///
+/// ```markdown
+/// > | * a
+///     ^
+/// ```
+pub fn before_unordered(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.enter(Name::ListItemPrefix);
+    State::Retry(StateName::ListItemMarker)
+}
+
+/// At ordered list item value.
+///
+/// ```markdown
+/// > | * a
+///     ^
+/// ```
+pub fn before_ordered(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.enter(Name::ListItemPrefix);
+    tokenizer.enter(Name::ListItemValue);
+    State::Retry(StateName::ListItemValue)
+}
+
+/// In ordered list item value.
+///
+/// ```markdown
+/// > | 1. a
+///     ^
+/// ```
+pub fn value(tokenizer: &mut Tokenizer) -> State {
+    if matches!(tokenizer.current, Some(b'.' | b')'))
+        && (!tokenizer.interrupt || tokenizer.tokenize_state.size < 2)
+    {
+        tokenizer.exit(Name::ListItemValue);
+        State::Retry(StateName::ListItemMarker)
+    } else if matches!(tokenizer.current, Some(b'0'..=b'9'))
+        && tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX
+    {
+        tokenizer.tokenize_state.size += 1;
+        tokenizer.consume();
+        State::Next(StateName::ListItemValue)
+    } else {
+        tokenizer.tokenize_state.size = 0;
+        State::Nok
+    }
+}
+
+/// At list item marker.
+///
+/// ```markdown
+/// > | * a
+///     ^
+/// > | 1. b
+///      ^
+/// ```
+pub fn marker(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.enter(Name::ListItemMarker);
+    tokenizer.consume();
+    tokenizer.exit(Name::ListItemMarker);
+    State::Next(StateName::ListItemMarkerAfter)
+}
+
+/// After list item marker.
+///
+/// ```markdown
+/// > | * a
+///      ^
+/// > | 1. b
+///       ^
+/// ```
+pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.tokenize_state.size = 1;
+    tokenizer.check(
+        State::Next(StateName::ListItemAfter),
+        State::Next(StateName::ListItemMarkerAfterFilled),
+    );
+    State::Retry(StateName::BlankLineStart)
+}
+
+/// After list item marker.
+///
+/// The marker is not followed by a blank line.
+///
+/// ```markdown
+/// > | * a
+///      ^
+/// ```
+pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.tokenize_state.size = 0;
+
+    // Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace.
+    tokenizer.attempt(
+        State::Next(StateName::ListItemAfter),
+        State::Next(StateName::ListItemPrefixOther),
+    );
+    State::Retry(StateName::ListItemWhitespace)
+}
+
+/// After marker, at whitespace.
+///
+/// ```markdown
+/// > | * a
+///      ^
+/// ```
+pub fn whitespace(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(State::Next(StateName::ListItemWhitespaceAfter), State::Nok);
+    State::Retry(space_or_tab_min_max(tokenizer, 1, TAB_SIZE))
+}
+
+/// After acceptable whitespace.
+///
+/// ```markdown
+/// > | * a
+///      ^
+/// ```
+pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
+    if let Some(b'\t' | b' ') = tokenizer.current {
+        State::Nok
+    } else {
+        State::Ok
+    }
+}
+
+/// After marker, followed by no indent or more indent that needed.
+///
+/// ```markdown
+/// > | * a
+///      ^
+/// ```
+pub fn prefix_other(tokenizer: &mut Tokenizer) -> State {
+    match tokenizer.current {
+        Some(b'\t' | b' ') => {
+            tokenizer.enter(Name::SpaceOrTab);
+            tokenizer.consume();
+            tokenizer.exit(Name::SpaceOrTab);
+            State::Next(StateName::ListItemAfter)
+        }
+        _ => State::Nok,
+    }
+}
+
+/// After list item prefix.
+///
+/// ```markdown
+/// > | * a
+///       ^
+/// ```
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+    let blank = tokenizer.tokenize_state.size == 1;
+    tokenizer.tokenize_state.size = 0;
+
+    if blank && tokenizer.interrupt {
+        State::Nok
+    } else {
+        let start = skip::to_back(
+            &tokenizer.events,
+            tokenizer.events.len() - 1,
+            &[Name::ListItem],
+        );
+        let mut prefix = Slice::from_position(
+            tokenizer.parse_state.bytes,
+            &Position {
+                start: &tokenizer.events[start].point,
+                end: &tokenizer.point,
+            },
+        )
+        .len();
+
+        if blank {
+            prefix += 1;
+        }
+
+        let container = &mut tokenizer.tokenize_state.document_container_stack
+            [tokenizer.tokenize_state.document_continued];
+
+        container.blank_initial = blank;
+        container.size = prefix;
+
+        tokenizer.exit(Name::ListItemPrefix);
+        tokenizer.register_resolver_before(ResolveName::List);
+        State::Ok
+    }
+}
+
+/// Start of list item continuation.
+///
+/// ```markdown
+///   | * a
+/// > |   b
+///     ^
+/// ```
+pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.check(
+        State::Next(StateName::ListItemContBlank),
+        State::Next(StateName::ListItemContFilled),
+    );
+    State::Retry(StateName::BlankLineStart)
+}
+
+/// Start of blank list item continuation.
+///
+/// ```markdown
+///   | * a
+/// > |
+///     ^
+///   |   b
+/// ```
+pub fn cont_blank(tokenizer: &mut Tokenizer) -> State {
+    let container = &mut tokenizer.tokenize_state.document_container_stack
+        [tokenizer.tokenize_state.document_continued];
+    let size = container.size;
+
+    if container.blank_initial {
+        State::Nok
+    } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+        // Consume, optionally, at most `size`.
+        State::Retry(space_or_tab_min_max(tokenizer, 0, size))
+    } else {
+        State::Ok
+    }
+}
+
+/// Start of non-blank list item continuation.
+///
+/// ```markdown
+///   | * a
+/// > |   b
+///     ^
+/// ```
+pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
+    let container = &mut tokenizer.tokenize_state.document_container_stack
+        [tokenizer.tokenize_state.document_continued];
+    let size = container.size;
+
+    container.blank_initial = false;
+
+    if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+        // Consume exactly `size`.
+        State::Retry(space_or_tab_min_max(tokenizer, size, size))
+    } else {
+        State::Nok
+    }
+}
+
+/// Find adjacent list items with the same marker.
+pub fn resolve(tokenizer: &mut Tokenizer) {
+    let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
+    let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
+    let mut index = 0;
+    let mut balance = 0;
+
+    // Merge list items.
+    while index < tokenizer.events.len() {
+        let event = &tokenizer.events[index];
+
+        if event.name == Name::ListItem {
+            if event.kind == Kind::Enter {
+                let end = skip::opt(&tokenizer.events, index, &[Name::ListItem]) - 1;
+                let marker = skip::to(&tokenizer.events, index, &[Name::ListItemMarker]);
+                // Guaranteed to be a valid ASCII byte.
+                let marker = Slice::from_index(
+                    tokenizer.parse_state.bytes,
+                    tokenizer.events[marker].point.index,
+                )
+                .head()
+                .unwrap();
+                let current = (marker, balance, index, end);
+
+                let mut list_index = lists_wip.len();
+                let mut matched = false;
+
+                while list_index > 0 {
+                    list_index -= 1;
+                    let previous = &lists_wip[list_index];
+                    let before = skip::opt(
+                        &tokenizer.events,
+                        previous.3 + 1,
+                        &[
+                            Name::SpaceOrTab,
+                            Name::LineEnding,
+                            Name::BlankLineEnding,
+                            Name::BlockQuotePrefix,
+                        ],
+                    );
+
+                    if previous.0 == current.0 && previous.1 == current.1 && before == current.2 {
+                        let previous_mut = &mut lists_wip[list_index];
+                        previous_mut.3 = current.3;
+                        lists.append(&mut lists_wip.split_off(list_index + 1));
+                        matched = true;
+                        break;
+                    }
+                }
+
+                if !matched {
+                    let mut index = lists_wip.len();
+                    let mut exit = None;
+
+                    while index > 0 {
+                        index -= 1;
+
+                        // If the current (new) item starts after where this
+                        // item on the stack ends, we can remove it from the
+                        // stack.
+                        if current.2 > lists_wip[index].3 {
+                            exit = Some(index);
+                        } else {
+                            break;
+                        }
+                    }
+
+                    if let Some(exit) = exit {
+                        lists.append(&mut lists_wip.split_off(exit));
+                    }
+
+                    lists_wip.push(current);
+                }
+
+                balance += 1;
+            } else {
+                balance -= 1;
+            }
+        }
+
+        index += 1;
+    }
+
+    lists.append(&mut lists_wip);
+
+    // Inject events.
+    let mut index = 0;
+    while index < lists.len() {
+        let list_item = &lists[index];
+        let mut list_start = tokenizer.events[list_item.2].clone();
+        let mut list_end = tokenizer.events[list_item.3].clone();
+        let name = match list_item.0 {
+            b'.' | b')' => Name::ListOrdered,
+            _ => Name::ListUnordered,
+        };
+        list_start.name = name.clone();
+        list_end.name = name;
+
+        tokenizer.map.add(list_item.2, 0, vec![list_start]);
+        tokenizer.map.add(list_item.3 + 1, 0, vec![list_end]);
+
+        index += 1;
+    }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 0adf611..566bb30 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -32,7 +32,7 @@
 //! *   [label end][label_end]
 //! *   [label start (image)][label_start_image]
 //! *   [label start (link)][label_start_link]
-//! *   [list][]
+//! *   [list item][list_item]
 //! *   [paragraph][]
 //! *   [thematic break][thematic_break]
 //!
@@ -84,7 +84,7 @@ pub mod html_text;
 pub mod label_end;
 pub mod label_start_image;
 pub mod label_start_link;
-pub mod list;
+pub mod list_item;
 pub mod paragraph;
 pub mod partial_bom;
 pub mod partial_data;
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index f493b96..1b581ea 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -20,7 +20,7 @@
 //! As using more than three markers has no effect other than wasting space,
 //! it is recommended to use exactly three markers.
 //! Thematic breaks formed with asterisks or dashes can interfere with
-//! [list][]s if there is whitespace between them: `* * *` and `- - -`.
+//! [list][list-item]s if there is whitespace between them: `* * *` and `- - -`.
 //! For these reasons, it is recommend to not use spaces or tabs between the
 //! markers.
 //! Thematic breaks formed with dashes (without whitespace) can also form
@@ -45,7 +45,7 @@
 //!
 //! [flow]: crate::content::flow
 //! [heading_setext]: crate::construct::heading_setext
-//! [list]: crate::construct::list
+//! [list-item]: crate::construct::list_item
 //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-hr-element
 
 use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
diff --git a/src/content/document.rs b/src/content/document.rs
index 41d60e2..f90aea7 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -6,7 +6,7 @@
 //! The constructs found in flow are:
 //!
 //! *   [Block quote][crate::construct::block_quote]
-//! *   [List][crate::construct::list]
+//! *   [List][crate::construct::list_item]
 
 use crate::event::{Content, Event, Kind, Link, Name, Point};
 use crate::parser::ParseState;
@@ -103,7 +103,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
 
         let name = match container.kind {
             Container::BlockQuote => StateName::BlockQuoteContStart,
-            Container::ListItem => StateName::ListContStart,
+            Container::ListItem => StateName::ListItemContStart,
         };
 
         tokenizer.attempt(
@@ -201,7 +201,7 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State
         State::Next(StateName::DocumentContainerNewAfter),
         State::Next(StateName::DocumentContainerNewBeforeNotList),
     );
-    State::Retry(StateName::ListStart)
+    State::Retry(StateName::ListItemStart)
 }
 
 /// At new container, but not a list (or block quote).
diff --git a/src/event.rs b/src/event.rs
index be32b5b..8cdb959 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -1332,7 +1332,7 @@ pub enum Name {
     ///     [`ListItemPrefix`][Name::ListItemPrefix],
     ///     [flow content][crate::content::flow]
     /// *   **Construct**:
-    ///     [`list`][crate::construct::list]
+    ///     [`list item`][crate::construct::list_item]
     ///
     /// ## Example
     ///
@@ -1352,7 +1352,7 @@ pub enum Name {
     /// *   **Content model**:
     ///     void
     /// *   **Construct**:
-    ///     [`list`][crate::construct::list]
+    ///     [`list item`][crate::construct::list_item]
     ///
     /// ## Example
     ///
@@ -1374,7 +1374,7 @@ pub enum Name {
     ///     [`ListItemValue`][Name::ListItemValue],
     ///     [`SpaceOrTab`][Name::SpaceOrTab]
     /// *   **Construct**:
-    ///     [`list`][crate::construct::list]
+    ///     [`list item`][crate::construct::list_item]
     ///
     /// ## Example
     ///
@@ -1394,7 +1394,7 @@ pub enum Name {
     /// *   **Content model**:
     ///     void
     /// *   **Construct**:
-    ///     [`list`][crate::construct::list]
+    ///     [`list item`][crate::construct::list_item]
     ///
     /// ## Example
     ///
@@ -1416,7 +1416,7 @@ pub enum Name {
     ///     [`LineEnding`][Name::LineEnding],
     ///     [`SpaceOrTab`][Name::SpaceOrTab]
     /// *   **Construct**:
-    ///     [`list`][crate::construct::list]
+    ///     [`list item`][crate::construct::list_item]
     ///
     /// ## Example
     ///
@@ -1440,7 +1440,7 @@ pub enum Name {
     ///     [`LineEnding`][Name::LineEnding],
     ///     [`SpaceOrTab`][Name::SpaceOrTab]
     /// *   **Construct**:
-    ///     [`list`][crate::construct::list]
+    ///     [`list item`][crate::construct::list_item]
     ///
     /// ## Example
     ///
diff --git a/src/lib.rs b/src/lib.rs
index 24a794b..f9f5326 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -218,13 +218,13 @@ pub struct Constructs {
     ///         ^^^^
     /// ```
     pub label_end: bool,
-    /// List.
+    /// List items.
     ///
     /// ```markdown
     /// > | * a
     ///     ^^^
     /// ```
-    pub list: bool,
+    pub list_item: bool,
     /// Thematic break.
     ///
     /// ```markdown
@@ -256,7 +256,7 @@ impl Default for Constructs {
             label_start_image: true,
             label_start_link: true,
             label_end: true,
-            list: true,
+            list_item: true,
             thematic_break: true,
         }
     }
diff --git a/src/resolve.rs b/src/resolve.rs
index e7d63f9..edc92b2 100644
--- a/src/resolve.rs
+++ b/src/resolve.rs
@@ -23,7 +23,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) {
         Name::Attention => construct::attention::resolve,
         Name::HeadingAtx => construct::heading_atx::resolve,
         Name::HeadingSetext => construct::heading_setext::resolve,
-        Name::List => construct::list::resolve,
+        Name::List => construct::list_item::resolve,
         Name::Paragraph => construct::paragraph::resolve,
         Name::Data => construct::partial_data::resolve,
         Name::String => content::string::resolve,
diff --git a/src/state.rs b/src/state.rs
index 006ffe1..344a31e 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -240,21 +240,21 @@ pub enum Name {
 
     LabelStartLinkStart,
 
-    ListStart,
-    ListBefore,
-    ListBeforeOrdered,
-    ListBeforeUnordered,
-    ListValue,
-    ListMarker,
-    ListMarkerAfter,
-    ListAfter,
-    ListMarkerAfterFilled,
-    ListWhitespace,
-    ListPrefixOther,
-    ListWhitespaceAfter,
-    ListContStart,
-    ListContBlank,
-    ListContFilled,
+    ListItemStart,
+    ListItemBefore,
+    ListItemBeforeOrdered,
+    ListItemBeforeUnordered,
+    ListItemValue,
+    ListItemMarker,
+    ListItemMarkerAfter,
+    ListItemAfter,
+    ListItemMarkerAfterFilled,
+    ListItemWhitespace,
+    ListItemPrefixOther,
+    ListItemWhitespaceAfter,
+    ListItemContStart,
+    ListItemContBlank,
+    ListItemContFilled,
 
     NonLazyContinuationStart,
     NonLazyContinuationAfter,
@@ -552,21 +552,21 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
         Name::LabelStartImageOpen => construct::label_start_image::open,
         Name::LabelStartLinkStart => construct::label_start_link::start,
 
-        Name::ListStart => construct::list::start,
-        Name::ListBefore => construct::list::before,
-        Name::ListBeforeOrdered => construct::list::before_ordered,
-        Name::ListBeforeUnordered => construct::list::before_unordered,
-        Name::ListValue => construct::list::value,
-        Name::ListMarker => construct::list::marker,
-        Name::ListMarkerAfter => construct::list::marker_after,
-        Name::ListAfter => construct::list::after,
-        Name::ListMarkerAfterFilled => construct::list::marker_after_filled,
-        Name::ListWhitespace => construct::list::whitespace,
-        Name::ListWhitespaceAfter => construct::list::whitespace_after,
-        Name::ListPrefixOther => construct::list::prefix_other,
-        Name::ListContStart => construct::list::cont_start,
-        Name::ListContBlank => construct::list::cont_blank,
-        Name::ListContFilled => construct::list::cont_filled,
+        Name::ListItemStart => construct::list_item::start,
+        Name::ListItemBefore => construct::list_item::before,
+        Name::ListItemBeforeOrdered => construct::list_item::before_ordered,
+        Name::ListItemBeforeUnordered => construct::list_item::before_unordered,
+        Name::ListItemValue => construct::list_item::value,
+        Name::ListItemMarker => construct::list_item::marker,
+        Name::ListItemMarkerAfter => construct::list_item::marker_after,
+        Name::ListItemAfter => construct::list_item::after,
+        Name::ListItemMarkerAfterFilled => construct::list_item::marker_after_filled,
+        Name::ListItemWhitespace => construct::list_item::whitespace,
+        Name::ListItemWhitespaceAfter => construct::list_item::whitespace_after,
+        Name::ListItemPrefixOther => construct::list_item::prefix_other,
+        Name::ListItemContStart => construct::list_item::cont_start,
+        Name::ListItemContBlank => construct::list_item::cont_blank,
+        Name::ListItemContFilled => construct::list_item::cont_filled,
 
         Name::NonLazyContinuationStart => construct::partial_non_lazy_continuation::start,
         Name::NonLazyContinuationAfter => construct::partial_non_lazy_continuation::after,
-- 
cgit