From 1bb160f9dc45c3cdbe929e8965be69bcf8415d0c Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Mon, 1 Aug 2022 11:27:39 +0200
Subject: Add missing docs, refactor some code

---
 src/tokenizer.rs | 100 ++++++++++++++++++++++++-------------------------------
 1 file changed, 43 insertions(+), 57 deletions(-)

(limited to 'src/tokenizer.rs')
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9c5e9f6..9ab4309 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -26,7 +26,7 @@ pub enum ContentType {
 }
 
 #[derive(Debug, PartialEq)]
-pub enum CharAction {
+pub enum ByteAction {
     Normal(u8),
     Insert(u8),
     Ignore,
@@ -47,10 +47,9 @@ pub struct Point {
     pub column: usize,
     /// 0-indexed position in the document.
     ///
-    /// Also an `index` into `codes`.
-    // To do: call it `offset`?
+    /// Also an `index` into `bytes`.
     pub index: usize,
-    /// To do.
+    /// Virtual step on the same `index`.
     pub vs: usize,
 }
 
@@ -171,7 +170,7 @@ pub struct Tokenizer<'a> {
     column_start: Vec<(usize, usize)>,
     // First line.
     first_line: usize,
-    /// To do.
+    /// First point after the last line ending.
     line_start: Point,
     /// Track whether the current byte is already consumed (`true`) or expected
     /// to be consumed (`false`).
@@ -192,7 +191,7 @@ pub struct Tokenizer<'a> {
     ///
     /// Tracked to make sure everything’s valid.
     pub stack: Vec<Token>,
-    /// To do.
+    /// Edit map, to batch changes.
     pub map: EditMap,
     /// List of attached resolvers, which will be called when done feeding,
     /// to clean events.
@@ -323,15 +322,15 @@ impl<'a> Tokenizer<'a> {
     /// Move to the next (virtual) byte.
     pub fn move_one(&mut self) {
         match byte_action(self.parse_state.bytes, &self.point) {
-            CharAction::Ignore => {
+            ByteAction::Ignore => {
                 self.point.index += 1;
             }
-            CharAction::Insert(byte) => {
+            ByteAction::Insert(byte) => {
                 self.previous = Some(byte);
                 self.point.column += 1;
                 self.point.vs += 1;
             }
-            CharAction::Normal(byte) => {
+            ByteAction::Normal(byte) => {
                 self.previous = Some(byte);
                 self.point.vs = 0;
                 self.point.index += 1;
@@ -386,7 +385,7 @@ impl<'a> Tokenizer<'a> {
         while point.index > 0 {
             point.index -= 1;
             let action = byte_action(self.parse_state.bytes, &point);
-            if !matches!(action, CharAction::Ignore) {
+            if !matches!(action, ByteAction::Ignore) {
                 point.index += 1;
                 break;
             }
@@ -439,7 +438,7 @@ impl<'a> Tokenizer<'a> {
             while point.index > 0 {
                 point.index -= 1;
                 let action = byte_action(self.parse_state.bytes, &point);
-                if !matches!(action, CharAction::Ignore) {
+                if !matches!(action, ByteAction::Ignore) {
                     point.index += 1;
                     break;
                 }
@@ -636,6 +635,7 @@ impl<'a> Tokenizer<'a> {
     ///
     /// This is set up to support repeatedly calling `feed`, and thus streaming
     /// markdown into the state machine, and normally pauses after feeding.
+    // Note: if needed: accept `vs`?
     pub fn push(
         &mut self,
         min: usize,
@@ -644,8 +644,6 @@ impl<'a> Tokenizer<'a> {
     ) -> State {
         debug_assert!(!self.resolved, "cannot feed after drain");
         debug_assert!(min >= self.point.index, "cannot move backwards");
-
-        // To do: accept `vs`?
         self.move_to((min, 0));
 
         let mut state = State::Fn(Box::new(start));
@@ -654,16 +652,11 @@ impl<'a> Tokenizer<'a> {
             match state {
                 State::Ok | State::Nok => break,
                 State::Fn(func) => match byte_action(self.parse_state.bytes, &self.point) {
-                    CharAction::Ignore => {
+                    ByteAction::Ignore => {
                         state = State::Fn(Box::new(func));
                         self.move_one();
                     }
-                    CharAction::Insert(byte) => {
-                        log::debug!("main: passing (fake): `{:?}` ({:?})", byte, self.point);
-                        self.expect(Some(byte));
-                        state = func(self);
-                    }
-                    CharAction::Normal(byte) => {
+                    ByteAction::Insert(byte) | ByteAction::Normal(byte) => {
                         log::debug!("main: passing: `{:?}` ({:?})", byte, self.point);
                         self.expect(Some(byte));
                         state = func(self);
@@ -685,35 +678,30 @@ impl<'a> Tokenizer<'a> {
             match state {
                 State::Ok | State::Nok => break,
                 State::Fn(func) => {
-                    // To do: clean this?
                     // We sometimes move back when flushing, so then we use those codes.
-                    if self.point.index == max {
-                        let byte = None;
-                        log::debug!("main: flushing eof: `{:?}` ({:?})", byte, self.point);
-                        self.expect(byte);
-                        state = func(self);
+                    let action = if self.point.index == max {
+                        None
                     } else {
-                        match byte_action(self.parse_state.bytes, &self.point) {
-                            CharAction::Ignore => {
-                                state = State::Fn(Box::new(func));
-                                self.move_one();
-                            }
-                            CharAction::Insert(byte) => {
-                                log::debug!(
-                                    "main: flushing (fake): `{:?}` ({:?})",
-                                    byte,
-                                    self.point
-                                );
-                                self.expect(Some(byte));
-                                state = func(self);
-                            }
-                            CharAction::Normal(byte) => {
-                                log::debug!("main: flushing: `{:?}` ({:?})", byte, self.point);
-                                self.expect(Some(byte));
-                                state = func(self);
-                            }
-                        }
+                        Some(byte_action(self.parse_state.bytes, &self.point))
                     };
+
+                    if let Some(ByteAction::Ignore) = action {
+                        state = State::Fn(Box::new(func));
+                        self.move_one();
+                    } else {
+                        let byte =
+                            if let Some(ByteAction::Insert(byte) | ByteAction::Normal(byte)) =
+                                action
+                            {
+                                Some(byte)
+                            } else {
+                                None
+                            };
+
+                        log::debug!("main: flushing: `{:?}` ({:?})", byte, self.point);
+                        self.expect(byte);
+                        state = func(self);
+                    }
                 }
             }
         }
@@ -733,18 +721,18 @@ impl<'a> Tokenizer<'a> {
     }
 }
 
-fn byte_action(bytes: &[u8], point: &Point) -> CharAction {
+fn byte_action(bytes: &[u8], point: &Point) -> ByteAction {
     if point.index < bytes.len() {
         let byte = bytes[point.index];
 
         if byte == b'\r' {
             // CRLF.
             if point.index < bytes.len() - 1 && bytes[point.index + 1] == b'\n' {
-                CharAction::Ignore
+                ByteAction::Ignore
             }
             // CR.
             else {
-                CharAction::Normal(b'\n')
+                ByteAction::Normal(b'\n')
             }
         } else if byte == b'\t' {
             let remainder = point.column % TAB_SIZE;
@@ -757,19 +745,17 @@ fn byte_action(bytes: &[u8], point: &Point) -> CharAction {
             // On the tab itself, first send it.
             if point.vs == 0 {
                 if vs == 0 {
-                    CharAction::Normal(byte)
+                    ByteAction::Normal(byte)
                 } else {
-                    CharAction::Insert(byte)
+                    ByteAction::Insert(byte)
                 }
             } else if vs == 0 {
-                CharAction::Normal(b' ')
+                ByteAction::Normal(b' ')
             } else {
-                CharAction::Insert(b' ')
+                ByteAction::Insert(b' ')
             }
-        }
-        // VS?
-        else {
-            CharAction::Normal(byte)
+        } else {
+            ByteAction::Normal(byte)
         }
     } else {
         unreachable!("out of bounds")
-- 
cgit