From bcc4676b84a06af5e38ebaa31f0217cae090be08 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Tue, 21 Jun 2022 13:14:07 +0200
Subject: Update todo list

---
 readme.md          | 86 +++++++++++++++++++++++++++++++++++++++++++++++-------
 src/subtokenize.rs |  9 +-----
 src/tokenizer.rs   | 39 +++++++++++++++----------
 3 files changed, 99 insertions(+), 35 deletions(-)

diff --git a/readme.md b/readme.md
index d991a58..9986d3f 100644
--- a/readme.md
+++ b/readme.md
@@ -64,37 +64,101 @@ cargo doc --document-private-items
 - [ ] (5) Figure out extensions
 - [ ] (1) Support turning off constructs
 
-### Small things
+### All the things
 
-- [ ] (1) Use `impl fmt::Display for x` for a bunch of enums, e.g., markers
-- [ ] (1) Parse initial and final whitespace of paragraphs (in text)
-- [ ] (1) Add docs to subtokenize
+#### Docs
+
+- [ ] (1) Add docs for `default_line_ending`
+- [ ] (1) Add docs for virtual spaces
+- [ ] (1) Add docs to `subtokenize.rs`
+- [ ] (1) Add docs for `link.rs`
+- [ ] (1) Add docs for token types
+- [ ] (1) Add docs for tokenizer (`go`, `define_skip`,
+      `account_for_potential_skip`, `attempt_5`, `attempt_7`, `call_multiple`)
+- [ ] (1) Add docs for sanitation (autolink, definition, resource)
+- [ ] (1) Add docs for how references and definitions match (definition, reference)
+- [ ] (1) Go through all bnf
+- [ ] (1) Go through all docs
 - [ ] (1) Add module docs to parser
 - [ ] (1) Add overview docs on how everything works
+
+#### Refactor
+
 - [ ] (1) Move safe protocols to constants
-- [ ] (3) Clean compiler
+- [ ] (1) Use `impl fmt::Display for x` for a bunch of enums, e.g., markers
+- [ ] (1) Make text data, string data constructs (document in
+      `construct/mod.rs`)
+- [ ] (1) Configurable tokens (destination, label, title)
+- [ ] (1) Configurable limit (destination)
+
+#### Parse
+
+- [ ] (1) Parse initial and final whitespace of paragraphs (in text)\
+       test (`code_indented`, `hard_break_escape`, `hard_break_trailing`,
+      `heading_atx`, `heading_setext`, `html_flow`, `misc_soft_break`,
+      `misc_tabs`, `thematic_break`)
+- [ ] (1) Get definition identifiers (definition)
+- [ ] (3) Interrupting (html flow complete)
+- [ ] (5) labels\
+       test (`character_escape`, `character_reference`, `definition`,
+      `misc_dangerous_protocol`, `misc_tabs`, `misc_url`, `thematic_break`)\
+       link link reference (definition)\
+       link label end (destination, label, title)\
+       link label start (label)
+- [ ] (5) attention\
+       test (`character_reference`, `hard_break_escape`, `hard_break_trailing`,
+      `heading_atx`, `heading_setext`, `html_flow`, `thematic_break`)\
+- [ ] (8) block quote\
+       test (`code_fenced`, `code_indented`, `heading_atx`, `heading_setext`,
+      `html_flow`, `misc_default_line_ending`, `thematic_break`)
+- [ ] (8) list\
+       test (`character_reference`, `code_indented`, `heading_setext`,
+      `html_flow`, `thematic_break`)\
+       link (`blank line`, `thematic break`)
+- [ ] (3) Lazy lines (`code indented`, `html flow`)
+- [ ] (3) Concrete (`html flow`)
+- [ ] (3) Turn off things (enable every test for these)
+- [ ] (3) Make tokenizer tokens extendable
+
+#### Test
+
 - [ ] (1) Make sure positional info is perfect
-- [ ] (3) Figure out lifetimes of things (see `life time` in source)
 - [ ] (3) Use `commonmark` tests
 - [ ] (3) Share a bunch of tests with `micromark-js`
+
+#### Misc
+
+- [ ] (3) Check subtokenizer unraveling is ok
+- [ ] (3) Remove splicing and cloning in subtokenizer
+- [ ] (3) Pass more references around
+- [ ] (1) Remove todos in `span.rs` if not needed
+- [ ] (1) Get markers from constructs (`string`, `text`)
+- [ ] (1) Do not capture in `tokenizer.go`
+- [ ] (1) Clean attempts
+- [ ] (3) Clean compiler
+- [ ] (3) Figure out lifetimes of things (see `life time` in source)
 - [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept,
       how to integrate with streams or so?
 - [ ] (1) Go through clippy rules, and such, to add strict code styles
 - [ ] (1) Make sure that rust character groups match CM character groups (e.g., is
       `unicode_whitespace` or so the same?)
 - [ ] (1) Any special handling of surrogates?
-- [ ] (1) Make sure debugging is useful for other folks
+- [ ] (1) Make sure debugging, assertions are useful for other folks
 - [ ] (3) Add some benchmarks, do some perf testing
 - [ ] (3) Write comparison to other parsers
 - [ ] (3) Add node/etc bindings?
-- [ ] (8) After all extensions, including MDX, are done, see if we can integrate
-      this with SWC to compile MDX
 - [ ] (3) Bunch of docs
 - [ ] (5) Site
 
+#### After
+
+- [ ] (8) Extensions!
+- [ ] (8) After all extensions, including MDX, are done, see if we can integrate
+      this with SWC to compile MDX
+
 ### Constructs
 
-- [ ] (5) attention (strong, emphasis) (text)
+- [ ] (5) attention (strong, emphasis)
 - [x] autolink
 - [x] blank line
 - [ ] (5) block quote
@@ -132,7 +196,7 @@ cargo doc --document-private-items
   - [x] html (flow)
   - [x] paragraph
   - [x] thematic break
-- [ ] (5) text
+- [ ] (8) text
   - [ ] attention (strong, emphasis) (text)
   - [x] autolink
   - [x] character escape
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 0623a37..1188c61 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -66,14 +66,7 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
                 };
 
                 result = tokenizer.feed(span::codes(codes, &span), func, enter.next == None);
-
-                if let Some(ref x) = result.1 {
-                    if !x.is_empty() {
-                        // To do: handle?
-                        unreachable!("subtokenize:remainder {:?}", x);
-                    }
-                }
-
+                assert!(result.1.is_none(), "expected no remainder");
                 index_opt = enter.next;
             }
 
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index ba9bcbb..909a1d1 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -222,12 +222,14 @@ impl Tokenizer {
         self.current = code;
     }
 
+    /// To do.
     pub fn define_skip(&mut self, point: &Point, index: usize) {
         self.column_start.insert(point.line, point.column);
         self.account_for_potential_skip();
         log::debug!("position: define skip: `{:?}` ({:?})", point, index);
     }
 
+    /// To do.
     fn account_for_potential_skip(&mut self) {
         match self.column_start.get(&self.point.line) {
             None => {}
@@ -462,6 +464,7 @@ impl Tokenizer {
     }
 
     // To do: lifetimes, boxes, lmao.
+    /// To do.
     pub fn attempt_2(
         &mut self,
         a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
@@ -481,6 +484,7 @@ impl Tokenizer {
         )
     }
 
+    /// To do.
     #[allow(clippy::too_many_arguments, clippy::many_single_char_names)]
     pub fn attempt_5(
         &mut self,
@@ -504,6 +508,7 @@ impl Tokenizer {
         )
     }
 
+    /// To do.
     #[allow(clippy::too_many_arguments, clippy::many_single_char_names)]
     pub fn attempt_7(
         &mut self,
@@ -529,6 +534,7 @@ impl Tokenizer {
         )
     }
 
+    /// To do.
     #[allow(clippy::too_many_arguments, clippy::many_single_char_names)]
     pub fn call_multiple(
         &mut self,
@@ -606,7 +612,7 @@ impl Tokenizer {
 
         // Yield to a higher loop if we shouldn’t feed EOFs.
         if !drain {
-            return (state, Some(codes[index..].to_vec()));
+            return check_statefn_result((state, Some(codes[index..].to_vec())));
         }
 
         loop {
@@ -618,14 +624,7 @@ impl Tokenizer {
                     log::debug!("main: passing eof");
                     self.expect(code);
                     let (next, remainder) = check_statefn_result(func(self, code));
-
-                    if let Some(ref x) = remainder {
-                        if !x.is_empty() {
-                            // To do: handle?
-                            unreachable!("drain:remainder {:?}", x);
-                        }
-                    }
-
+                    assert!(remainder.is_none(), "expected no remainder");
                     state = next;
                 }
             }
@@ -661,8 +660,13 @@ fn attempt_impl(
             }
         }
 
-        // To do: `remainder` must never be bigger than codes I guess?
-        // To do: `remainder` probably has to be taken *from* `codes`, in a similar vain to the `Ok` handling below.
+        if let Some(ref list) = remainder {
+            assert!(
+                list.len() <= codes.len(),
+                "`remainder` must be less than or equal to `codes`"
+            );
+        }
+
         match next {
             State::Ok => {
                 let remaining = if let Some(x) = remainder { x } else { vec![] };
@@ -670,6 +674,7 @@ fn attempt_impl(
             }
             State::Nok => check_statefn_result(done((codes, vec![]), false, tokenizer)),
             State::Fn(func) => {
+                assert!(remainder.is_none(), "expected no remainder");
                 check_statefn_result((State::Fn(attempt_impl(func, codes, done)), None))
             }
         }
@@ -712,20 +717,18 @@ pub fn as_codes(value: &str) -> Vec<Code> {
                 }
                 // Send a tab and virtual spaces.
                 '\t' => {
-                    // To do: is this correct?
                     let remainder = column % TAB_SIZE;
-                    let virtual_spaces = if remainder == 0 {
+                    let mut virtual_spaces = if remainder == 0 {
                         0
                     } else {
                         TAB_SIZE - remainder
                     };
                     codes.push(Code::Char(char));
                     column += 1;
-                    let mut index = 0;
-                    while index < virtual_spaces {
+                    while virtual_spaces > 0 {
                         codes.push(Code::VirtualSpace);
                         column += 1;
-                        index += 1;
+                        virtual_spaces -= 1;
                     }
                 }
                 // Send an LF.
@@ -770,6 +773,10 @@ fn check_statefn_result(result: StateFnResult) -> StateFnResult {
         if Some(&Code::None) == list.last() {
             list.pop();
         }
+
+        if list.is_empty() {
+            return (state, None);
+        }
     }
 
     (state, remainder)
-- 
cgit