Refactor internal docs, code style of tokenizer

author: Titus Wormer <tituswormer@gmail.com> 2022-08-11 13:31:20 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-08-11 13:31:20 +0200
commit: 2d35cbfceace81a217cd0fbdae7a8777c7a6465e (patch)
tree: e5e69d44c5c00d1dc70f4e3a227f67fd5c771389
parent: 053a2603e4bd5ec9caf40617b52136e5ef3fcf0a (diff)
download: markdown-rs-2d35cbfceace81a217cd0fbdae7a8777c7a6465e.tar.gz
markdown-rs-2d35cbfceace81a217cd0fbdae7a8777c7a6465e.tar.bz2
markdown-rs-2d35cbfceace81a217cd0fbdae7a8777c7a6465e.zip
17 files changed, 420 insertions, 381 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 0d4345a..26e1148 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -162,7 +162,7 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
 
     if let Some(b'`' | b'~') = tokenizer.current {
         tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
-        tokenizer.tokenize_state.prefix = prefix;
+        tokenizer.tokenize_state.size_c = prefix;
         tokenizer.enter(Token::CodeFencedFenceSequence);
         State::Retry(StateName::CodeFencedSequenceOpen)
     } else {
@@ -196,7 +196,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
         }
         _ => {
             tokenizer.tokenize_state.marker = 0;
-            tokenizer.tokenize_state.prefix = 0;
+            tokenizer.tokenize_state.size_c = 0;
             tokenizer.tokenize_state.size = 0;
             State::Nok
         }
@@ -259,7 +259,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State {
         Some(b'`') if tokenizer.tokenize_state.marker == b'`' => {
             tokenizer.concrete = false;
             tokenizer.tokenize_state.marker = 0;
-            tokenizer.tokenize_state.prefix = 0;
+            tokenizer.tokenize_state.size_c = 0;
             tokenizer.tokenize_state.size = 0;
             State::Nok
         }
@@ -307,7 +307,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State {
         Some(b'`') if tokenizer.tokenize_state.marker == b'`' => {
             tokenizer.concrete = false;
             tokenizer.tokenize_state.marker = 0;
-            tokenizer.tokenize_state.prefix = 0;
+            tokenizer.tokenize_state.size_c = 0;
             tokenizer.tokenize_state.size = 0;
             State::Nok
         }
@@ -410,14 +410,14 @@ pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State {
 pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
-            tokenizer.tokenize_state.size_other += 1;
+            tokenizer.tokenize_state.size_b += 1;
             tokenizer.consume();
             State::Next(StateName::CodeFencedSequenceClose)
         }
-        _ if tokenizer.tokenize_state.size_other >= CODE_FENCED_SEQUENCE_SIZE_MIN
-            && tokenizer.tokenize_state.size_other >= tokenizer.tokenize_state.size =>
+        _ if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN
+            && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size =>
         {
-            tokenizer.tokenize_state.size_other = 0;
+            tokenizer.tokenize_state.size_b = 0;
             tokenizer.exit(Token::CodeFencedFenceSequence);
             let name = space_or_tab(tokenizer);
             tokenizer.attempt(
@@ -427,7 +427,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
             )
         }
         _ => {
-            tokenizer.tokenize_state.size_other = 0;
+            tokenizer.tokenize_state.size_b = 0;
             State::Nok
         }
     }
@@ -474,7 +474,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State {
 ///   | ~~~
 /// ```
 pub fn content_start(tokenizer: &mut Tokenizer) -> State {
-    let name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.prefix);
+    let name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.size_c);
     tokenizer.attempt(
         name,
         State::Next(StateName::CodeFencedBeforeContentChunk),
@@ -536,7 +536,7 @@ pub fn content_chunk(tokenizer: &mut Tokenizer) -> State {
 pub fn after(tokenizer: &mut Tokenizer) -> State {
     tokenizer.exit(Token::CodeFenced);
     tokenizer.tokenize_state.marker = 0;
-    tokenizer.tokenize_state.prefix = 0;
+    tokenizer.tokenize_state.size_c = 0;
     tokenizer.tokenize_state.size = 0;
     // Feel free to interrupt.
     tokenizer.interrupt = false;
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index 2c8faf3..d7ada3d 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -185,16 +185,16 @@ pub fn data(tokenizer: &mut Tokenizer) -> State {
 pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         Some(b'`') => {
-            tokenizer.tokenize_state.size_other += 1;
+            tokenizer.tokenize_state.size_b += 1;
             tokenizer.consume();
             State::Next(StateName::CodeTextSequenceClose)
         }
         _ => {
-            if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_other {
+            if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_b {
                 tokenizer.exit(Token::CodeTextSequence);
                 tokenizer.exit(Token::CodeText);
                 tokenizer.tokenize_state.size = 0;
-                tokenizer.tokenize_state.size_other = 0;
+                tokenizer.tokenize_state.size_b = 0;
                 State::Ok
             } else {
                 let index = tokenizer.events.len();
@@ -202,7 +202,7 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
                 // More or less accents: mark as data.
                 tokenizer.events[index - 1].token_type = Token::CodeTextData;
                 tokenizer.events[index].token_type = Token::CodeTextData;
-                tokenizer.tokenize_state.size_other = 0;
+                tokenizer.tokenize_state.size_b = 0;
                 State::Retry(StateName::CodeTextBetween)
             }
         }
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 62d0f3b..5db611b 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -174,7 +174,12 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State {
     }
 }
 
-/// To do.
+/// After the marker.
+///
+/// ```markdown
+/// > | [a]: b "c"
+///         ^
+/// ```
 pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
     let name = space_or_tab_eol(tokenizer);
     tokenizer.attempt(
@@ -196,7 +201,7 @@ pub fn destination_before(tokenizer: &mut Tokenizer) -> State {
     tokenizer.tokenize_state.token_3 = Token::DefinitionDestinationLiteralMarker;
     tokenizer.tokenize_state.token_4 = Token::DefinitionDestinationRaw;
     tokenizer.tokenize_state.token_5 = Token::DefinitionDestinationString;
-    tokenizer.tokenize_state.size_other = usize::MAX;
+    tokenizer.tokenize_state.size_b = usize::MAX;
     tokenizer.attempt(
         StateName::DestinationStart,
         State::Next(StateName::DefinitionDestinationAfter),
@@ -216,7 +221,7 @@ pub fn destination_after(tokenizer: &mut Tokenizer) -> State {
     tokenizer.tokenize_state.token_3 = Token::Data;
     tokenizer.tokenize_state.token_4 = Token::Data;
     tokenizer.tokenize_state.token_5 = Token::Data;
-    tokenizer.tokenize_state.size_other = 0;
+    tokenizer.tokenize_state.size_b = 0;
     tokenizer.attempt(
         StateName::DefinitionTitleBefore,
         State::Next(StateName::DefinitionAfter),
@@ -231,7 +236,7 @@ pub fn destination_missing(tokenizer: &mut Tokenizer) -> State {
     tokenizer.tokenize_state.token_3 = Token::Data;
     tokenizer.tokenize_state.token_4 = Token::Data;
     tokenizer.tokenize_state.token_5 = Token::Data;
-    tokenizer.tokenize_state.size_other = 0;
+    tokenizer.tokenize_state.size_b = 0;
     State::Nok
 }
 
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index b49b231..7a346e9 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -508,7 +508,7 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
             State::Next(StateName::HtmlFlowCompleteAttributeValueBefore)
         }
         Some(b'"' | b'\'') => {
-            tokenizer.tokenize_state.marker_other = tokenizer.current.unwrap();
+            tokenizer.tokenize_state.marker_b = tokenizer.current.unwrap();
             tokenizer.consume();
             State::Next(StateName::HtmlFlowCompleteAttributeValueQuoted)
         }
@@ -528,13 +528,11 @@ pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         None | Some(b'\n') => {
             tokenizer.tokenize_state.marker = 0;
-            tokenizer.tokenize_state.marker_other = 0;
+            tokenizer.tokenize_state.marker_b = 0;
             State::Nok
         }
-        Some(b'"' | b'\'')
-            if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker_other =>
-        {
-            tokenizer.tokenize_state.marker_other = 0;
+        Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker_b => {
+            tokenizer.tokenize_state.marker_b = 0;
             tokenizer.consume();
             State::Next(StateName::HtmlFlowCompleteAttributeValueQuotedAfter)
         }
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index df6bd99..7474dbf 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -207,10 +207,11 @@ pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
 pub fn comment(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         None => State::Nok,
-        Some(b'\n') => {
-            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextComment);
-            State::Retry(StateName::HtmlTextLineEndingBefore)
-        }
+        Some(b'\n') => tokenizer.attempt(
+            StateName::HtmlTextLineEndingBefore,
+            State::Next(StateName::HtmlTextComment),
+            State::Nok,
+        ),
         Some(b'-') => {
             tokenizer.consume();
             State::Next(StateName::HtmlTextCommentClose)
@@ -269,10 +270,11 @@ pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
 pub fn cdata(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         None => State::Nok,
-        Some(b'\n') => {
-            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextCdata);
-            State::Retry(StateName::HtmlTextLineEndingBefore)
-        }
+        Some(b'\n') => tokenizer.attempt(
+            StateName::HtmlTextLineEndingBefore,
+            State::Next(StateName::HtmlTextCdata),
+            State::Nok,
+        ),
         Some(b']') => {
             tokenizer.consume();
             State::Next(StateName::HtmlTextCdataClose)
@@ -323,10 +325,11 @@ pub fn cdata_end(tokenizer: &mut Tokenizer) -> State {
 pub fn declaration(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         None | Some(b'>') => State::Retry(StateName::HtmlTextEnd),
-        Some(b'\n') => {
-            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextDeclaration);
-            State::Retry(StateName::HtmlTextLineEndingBefore)
-        }
+        Some(b'\n') => tokenizer.attempt(
+            StateName::HtmlTextLineEndingBefore,
+            State::Next(StateName::HtmlTextDeclaration),
+            State::Nok,
+        ),
         _ => {
             tokenizer.consume();
             State::Next(StateName::HtmlTextDeclaration)
@@ -343,10 +346,11 @@ pub fn declaration(tokenizer: &mut Tokenizer) -> State {
 pub fn instruction(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         None => State::Nok,
-        Some(b'\n') => {
-            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextInstruction);
-            State::Retry(StateName::HtmlTextLineEndingBefore)
-        }
+        Some(b'\n') => tokenizer.attempt(
+            StateName::HtmlTextLineEndingBefore,
+            State::Next(StateName::HtmlTextInstruction),
+            State::Nok,
+        ),
         Some(b'?') => {
             tokenizer.consume();
             State::Next(StateName::HtmlTextInstructionClose)
@@ -413,10 +417,11 @@ pub fn tag_close(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
-        Some(b'\n') => {
-            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagCloseBetween);
-            State::Retry(StateName::HtmlTextLineEndingBefore)
-        }
+        Some(b'\n') => tokenizer.attempt(
+            StateName::HtmlTextLineEndingBefore,
+            State::Next(StateName::HtmlTextTagCloseBetween),
+            State::Nok,
+        ),
         Some(b'\t' | b' ') => {
             tokenizer.consume();
             State::Next(StateName::HtmlTextTagCloseBetween)
@@ -451,10 +456,11 @@ pub fn tag_open(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
-        Some(b'\n') => {
-            tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagOpenBetween);
-            State::Retry(StateName::HtmlTextLineEndingBefore)
-        }
+        Some(b'\n') => tokenizer.attempt(
+            StateName::HtmlTextLineEndingBefore,
+            State::Next(StateName::HtmlTextTagOpenBetween),
+            State::Nok,
+        ),
         Some(b'\t' | b' ') => {
             tokenizer.consume();
             State::Next(StateName::HtmlTextTagOpenBetween)
@@ -498,11 +504,11 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
-        Some(b'\n') => {
-            tokenizer.tokenize_state.return_state =
-                Some(StateName::HtmlTextTagOpenAttributeNameAfter);
-            State::Retry(StateName::HtmlTextLineEndingBefore)
-        }
+        Some(b'\n') => tokenizer.attempt(
+            StateName::HtmlTextLineEndingBefore,
+            State::Next(StateName::HtmlTextTagOpenAttributeNameAfter),
+            State::Nok,
+        ),
         Some(b'\t' | b' ') => {
             tokenizer.consume();
             State::Next(StateName::HtmlTextTagOpenAttributeNameAfter)
@@ -525,11 +531,11 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
 pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok,
-        Some(b'\n') => {
-            tokenizer.tokenize_state.return_state =
-                Some(StateName::HtmlTextTagOpenAttributeValueBefore);
-            State::Retry(StateName::HtmlTextLineEndingBefore)
-        }
+        Some(b'\n') => tokenizer.attempt(
+            StateName::HtmlTextLineEndingBefore,
+            State::Next(StateName::HtmlTextTagOpenAttributeValueBefore),
+            State::Nok,
+        ),
         Some(b'\t' | b' ') => {
             tokenizer.consume();
             State::Next(StateName::HtmlTextTagOpenAttributeValueBefore)
@@ -558,11 +564,11 @@ pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
             tokenizer.tokenize_state.marker = 0;
             State::Nok
         }
-        Some(b'\n') => {
-            tokenizer.tokenize_state.return_state =
-                Some(StateName::HtmlTextTagOpenAttributeValueQuoted);
-            State::Retry(StateName::HtmlTextLineEndingBefore)
-        }
+        Some(b'\n') => tokenizer.attempt(
+            StateName::HtmlTextLineEndingBefore,
+            State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted),
+            State::Nok,
+        ),
         Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
             tokenizer.tokenize_state.marker = 0;
             tokenizer.consume();
@@ -678,5 +684,5 @@ pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn line_ending_after_prefix(tokenizer: &mut Tokenizer) -> State {
     tokenizer.enter(Token::HtmlTextData);
-    State::Retry(tokenizer.tokenize_state.return_state.take().unwrap())
+    State::Ok
 }
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 3337cec..a25f917 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -170,12 +170,12 @@ use crate::util::{
 pub fn start(tokenizer: &mut Tokenizer) -> State {
     if Some(b']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {
         let mut label_start_index = None;
-        let mut index = tokenizer.label_start_stack.len();
+        let mut index = tokenizer.tokenize_state.label_start_stack.len();
 
         while index > 0 {
             index -= 1;
 
-            if !tokenizer.label_start_stack[index].balanced {
+            if !tokenizer.tokenize_state.label_start_stack[index].balanced {
                 label_start_index = Some(index);
                 break;
             }
@@ -184,6 +184,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
         // If there is an okay opening:
         if let Some(label_start_index) = label_start_index {
             let label_start = tokenizer
+                .tokenize_state
                 .label_start_stack
                 .get_mut(label_start_index)
                 .unwrap();
@@ -221,7 +222,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
 ///       ^
 /// ```
 pub fn after(tokenizer: &mut Tokenizer) -> State {
-    let start = &tokenizer.label_start_stack[tokenizer.tokenize_state.start];
+    let start = &tokenizer.tokenize_state.label_start_stack[tokenizer.tokenize_state.start];
     let defined = tokenizer
         .parse_state
         .definitions
@@ -298,17 +299,23 @@ pub fn reference_not_full(tokenizer: &mut Tokenizer) -> State {
 pub fn ok(tokenizer: &mut Tokenizer) -> State {
     let label_start_index = tokenizer.tokenize_state.start;
     // Remove this one and everything after it.
-    let mut left = tokenizer.label_start_stack.split_off(label_start_index);
+    let mut left = tokenizer
+        .tokenize_state
+        .label_start_stack
+        .split_off(label_start_index);
     // Remove this one from `left`, as we’ll move it to `media_list`.
     let label_start = left.remove(0);
-    tokenizer.label_start_list_loose.append(&mut left);
+    tokenizer
+        .tokenize_state
+        .label_start_list_loose
+        .append(&mut left);
 
     let is_link = tokenizer.events[label_start.start.0].token_type == Token::LabelLink;
 
     if is_link {
         let mut index = 0;
-        while index < tokenizer.label_start_stack.len() {
-            let label_start = &mut tokenizer.label_start_stack[index];
+        while index < tokenizer.tokenize_state.label_start_stack.len() {
+            let label_start = &mut tokenizer.tokenize_state.label_start_stack[index];
             if tokenizer.events[label_start.start.0].token_type == Token::LabelLink {
                 label_start.inactive = true;
             }
@@ -316,7 +323,7 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State {
         }
     }
 
-    tokenizer.media_list.push(Media {
+    tokenizer.tokenize_state.media_list.push(Media {
         start: label_start.start,
         end: (tokenizer.tokenize_state.end, tokenizer.events.len() - 1),
     });
@@ -340,6 +347,7 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn nok(tokenizer: &mut Tokenizer) -> State {
     tokenizer
+        .tokenize_state
         .label_start_stack
         .get_mut(tokenizer.tokenize_state.start)
         .unwrap()
@@ -398,7 +406,7 @@ pub fn resource_open(tokenizer: &mut Tokenizer) -> State {
         tokenizer.tokenize_state.token_3 = Token::ResourceDestinationLiteralMarker;
         tokenizer.tokenize_state.token_4 = Token::ResourceDestinationRaw;
         tokenizer.tokenize_state.token_5 = Token::ResourceDestinationString;
-        tokenizer.tokenize_state.size_other = RESOURCE_DESTINATION_BALANCE_MAX;
+        tokenizer.tokenize_state.size_b = RESOURCE_DESTINATION_BALANCE_MAX;
 
         tokenizer.attempt(
             StateName::DestinationStart,
@@ -420,7 +428,7 @@ pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State {
     tokenizer.tokenize_state.token_3 = Token::Data;
     tokenizer.tokenize_state.token_4 = Token::Data;
     tokenizer.tokenize_state.token_5 = Token::Data;
-    tokenizer.tokenize_state.size_other = 0;
+    tokenizer.tokenize_state.size_b = 0;
     let name = space_or_tab_eol(tokenizer);
     tokenizer.attempt(
         name,
@@ -436,7 +444,7 @@ pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State {
     tokenizer.tokenize_state.token_3 = Token::Data;
     tokenizer.tokenize_state.token_4 = Token::Data;
     tokenizer.tokenize_state.token_5 = Token::Data;
-    tokenizer.tokenize_state.size_other = 0;
+    tokenizer.tokenize_state.size_b = 0;
     State::Nok
 }
 
@@ -605,9 +613,9 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {
 /// images, or turns them back into data.
 #[allow(clippy::too_many_lines)]
 pub fn resolve_media(tokenizer: &mut Tokenizer) {
-    let mut left = tokenizer.label_start_list_loose.split_off(0);
-    let mut left_2 = tokenizer.label_start_stack.split_off(0);
-    let media = tokenizer.media_list.split_off(0);
+    let mut left = tokenizer.tokenize_state.label_start_list_loose.split_off(0);
+    let mut left_2 = tokenizer.tokenize_state.label_start_stack.split_off(0);
+    let media = tokenizer.tokenize_state.media_list.split_off(0);
     left.append(&mut left_2);
 
     let events = &tokenizer.events;
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 1730fc3..629e836 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -64,7 +64,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {
             tokenizer.consume();
             tokenizer.exit(Token::LabelMarker);
             tokenizer.exit(Token::LabelImage);
-            tokenizer.label_start_stack.push(LabelStart {
+            tokenizer.tokenize_state.label_start_stack.push(LabelStart {
                 start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1),
                 balanced: false,
                 inactive: false,
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index c47941c..6eb7b40 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -46,7 +46,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
             tokenizer.consume();
             tokenizer.exit(Token::LabelMarker);
             tokenizer.exit(Token::LabelLink);
-            tokenizer.label_start_stack.push(LabelStart {
+            tokenizer.tokenize_state.label_start_stack.push(LabelStart {
                 start: (start, tokenizer.events.len() - 1),
                 balanced: false,
                 inactive: false,
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index a68f359..0ad67c5 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -17,8 +17,8 @@ use crate::tokenizer::{EventType, State, StateName, Tokenizer};
 /// ```
 pub fn start(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
-        // Make sure to eat the first `stop`.
-        Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {
+        // Make sure to eat the first `markers`.
+        Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => {
             tokenizer.enter(Token::Data);
             tokenizer.consume();
             State::Next(StateName::DataInside)
@@ -42,7 +42,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
             tokenizer.exit(Token::LineEnding);
             State::Next(StateName::DataAtBreak)
         }
-        Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {
+        Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => {
             tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data));
             State::Ok
         }
@@ -62,7 +62,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
 pub fn inside(tokenizer: &mut Tokenizer) -> State {
     let done = match tokenizer.current {
         None | Some(b'\n') => true,
-        Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => true,
+        Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => true,
         _ => false,
     };
 
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 26fadc4..735fb38 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -182,7 +182,7 @@ pub fn raw(tokenizer: &mut Tokenizer) -> State {
             tokenizer.tokenize_state.size = 0;
             State::Ok
         }
-        Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_other => {
+        Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_b => {
             tokenizer.consume();
             tokenizer.tokenize_state.size += 1;
             State::Next(StateName::DestinationRaw)
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index a151841..6447961 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -142,13 +142,26 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
     }
 }
 
-/// To do.
+/// In a label, after whitespace.
+///
+/// ```markdown
+///   | [a␊
+/// > | b]
+///     ^
+/// ```
 pub fn eol_after(tokenizer: &mut Tokenizer) -> State {
     tokenizer.tokenize_state.connect = true;
     State::Retry(StateName::LabelAtBreak)
 }
 
-/// To do.
+/// In a label, at a blank line.
+///
+/// ```markdown
+///   | [a␊
+/// > | ␊
+///     ^
+///   | b]
+/// ```
 pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
     tokenizer.tokenize_state.marker = 0;
     tokenizer.tokenize_state.connect = false;
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 0b81418..209240e 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -133,13 +133,26 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
     }
 }
 
-/// To do.
+/// In a title, after whitespace.
+///
+/// ```markdown
+///   | "a␊
+/// > | b"
+///     ^
+/// ```
 pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
     tokenizer.tokenize_state.connect = true;
     State::Retry(StateName::TitleAtBreak)
 }
 
-/// To do.
+/// In a title, at a blank line.
+///
+/// ```markdown
+///   | "a␊
+/// > | ␊
+///     ^
+///   | b"
+/// ```
 pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
     tokenizer.tokenize_state.marker = 0;
     tokenizer.tokenize_state.connect = false;
diff --git a/src/content/document.rs b/src/content/document.rs
index 98f8a7d..49ca919 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -59,7 +59,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
     let state = tokenizer.push(
         (0, 0),
         (parse_state.bytes.len(), 0),
-        StateName::DocumentStart,
+        State::Next(StateName::DocumentStart),
     );
     tokenizer.flush(state, true);
 
@@ -105,7 +105,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
 ///     ^
 /// ```
 pub fn start(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.tokenize_state.child_tokenizer = Some(Box::new(Tokenizer::new(
+    tokenizer.tokenize_state.document_child = Some(Box::new(Tokenizer::new(
         tokenizer.point.clone(),
         tokenizer.parse_state,
     )));
@@ -173,7 +173,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
     if tokenizer.tokenize_state.document_continued
         == tokenizer.tokenize_state.document_container_stack.len()
     {
-        let child = tokenizer.tokenize_state.child_tokenizer.as_ref().unwrap();
+        let child = tokenizer.tokenize_state.document_child.as_ref().unwrap();
 
         tokenizer.interrupt = child.interrupt;
 
@@ -209,7 +209,12 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
     )
 }
 
-/// To do.
+/// Maybe before a new container, but not a block quote.
+//
+/// ```markdown
+/// > | * a
+///     ^
+/// ```
 pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State {
     // List item?
     // We replace the empty block quote container for this new list one.
@@ -227,7 +232,12 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State
     )
 }
 
-/// To do.
+/// Maybe before a new container, but not a list.
+//
+/// ```markdown
+/// > | a
+///     ^
+/// ```
 pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State {
     // It wasn’t a new block quote or a list.
     // Swap the new container (in the middle) with the existing one (at the end).
@@ -283,7 +293,7 @@ pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
 ///       ^
 /// ```
 pub fn containers_after(tokenizer: &mut Tokenizer) -> State {
-    let child = tokenizer.tokenize_state.child_tokenizer.as_mut().unwrap();
+    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
 
     child.lazy = tokenizer.tokenize_state.document_continued
         != tokenizer.tokenize_state.document_container_stack.len();
@@ -312,7 +322,12 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State {
     }
 }
 
-/// To do.
+/// In flow.
+//
+/// ```markdown
+/// > | * ab
+///       ^
+/// ```
 pub fn flow_inside(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         None => {
@@ -340,23 +355,18 @@ pub fn flow_inside(tokenizer: &mut Tokenizer) -> State {
 ///     ^  ^
 /// ```
 pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
-    let child = tokenizer.tokenize_state.child_tokenizer.as_mut().unwrap();
+    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
     let state = tokenizer
         .tokenize_state
         .document_child_state
         .unwrap_or(State::Next(StateName::FlowStart));
 
-    let name = match state {
-        State::Next(name) => name,
-        _ => unreachable!("expected state name"),
-    };
-
     tokenizer.tokenize_state.document_exits.push(None);
 
     let state = child.push(
         (child.point.index, child.point.vs),
         (tokenizer.point.index, tokenizer.point.vs),
-        name,
+        state,
     );
 
     let paragraph = matches!(state, State::Next(StateName::ParagraphInside))
@@ -403,7 +413,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
         .document_container_stack
         .split_off(tokenizer.tokenize_state.document_continued);
 
-    let child = tokenizer.tokenize_state.child_tokenizer.as_mut().unwrap();
+    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
 
     // Flush if needed.
     if *phase != Phase::After {
@@ -463,7 +473,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
 
 // Inject everything together.
 fn resolve(tokenizer: &mut Tokenizer) {
-    let child = tokenizer.tokenize_state.child_tokenizer.as_mut().unwrap();
+    let child = tokenizer.tokenize_state.document_child.as_mut().unwrap();
 
     // First, add the container exits into `child`.
     let mut child_index = 0;
diff --git a/src/content/string.rs b/src/content/string.rs
index 75cd56a..5dfceb0 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -20,7 +20,7 @@ const MARKERS: [u8; 2] = [b'&', b'\\'];
 /// Start of string.
 pub fn start(tokenizer: &mut Tokenizer) -> State {
     tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve));
-    tokenizer.tokenize_state.stop = &MARKERS;
+    tokenizer.tokenize_state.markers = &MARKERS;
     State::Retry(StateName::StringBefore)
 }
 
diff --git a/src/content/text.rs b/src/content/text.rs
index ee70f33..4e93779 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -38,7 +38,7 @@ const MARKERS: [u8; 9] = [
 /// Start of text.
 pub fn start(tokenizer: &mut Tokenizer) -> State {
     tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve));
-    tokenizer.tokenize_state.stop = &MARKERS;
+    tokenizer.tokenize_state.markers = &MARKERS;
     State::Retry(StateName::TextBefore)
 }
 
@@ -91,7 +91,7 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
     }
 }
 
-/// To do.
+/// At `<`, which wasn’t an autolink: before HTML?
 pub fn before_html(tokenizer: &mut Tokenizer) -> State {
     tokenizer.attempt(
         StateName::HtmlTextStart,
@@ -100,7 +100,7 @@ pub fn before_html(tokenizer: &mut Tokenizer) -> State {
     )
 }
 
-/// To do.
+/// At `\`, which wasn’t a character escape: before a hard break?
 pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
     tokenizer.attempt(
         StateName::HardBreakEscapeStart,
@@ -110,10 +110,6 @@ pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
 }
 
 /// At data.
-///
-/// ```markdown
-/// |qwe
-/// ```
 pub fn before_data(tokenizer: &mut Tokenizer) -> State {
     tokenizer.attempt(
         StateName::DataStart,
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 3d923d3..bf6a106 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -99,10 +99,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
                     state = tokenizer.push(
                         (enter.point.index, enter.point.vs),
                         (end.index, end.vs),
-                        match state {
-                            State::Next(func) => func,
-                            _ => unreachable!("cannot be ok/nok"),
-                        },
+                        state,
                     );
 
                     link_index = link_curr.next;
@@ -112,7 +109,6 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
 
                 divide_events(&mut map, events, index, &mut tokenizer.events);
 
-                // To do: check `tokenizer.events` if there is a deep content type?
                 done = false;
             }
         }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 3cdd2d3..04a8cc3 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -29,11 +29,16 @@ pub enum ContentType {
     Text,
 }
 
-/// To do.
+/// How to handle a byte.
 #[derive(Debug, PartialEq)]
 pub enum ByteAction {
+    /// This is a normal byte.
+    ///
+    /// Includes replaced bytes.
     Normal(u8),
+    /// This is a new byte.
     Insert(u8),
+    /// This byte must be ignored.
     Ignore,
 }
 
@@ -84,22 +89,6 @@ pub struct Event {
     pub link: Option<Link>,
 }
 
-#[derive(Debug, PartialEq)]
-enum AttemptKind {
-    Attempt,
-    Check,
-}
-
-/// To do.
-#[derive(Debug)]
-struct Attempt {
-    /// To do.
-    ok: State,
-    nok: State,
-    kind: AttemptKind,
-    state: Option<InternalState>,
-}
-
 /// Callback that can be registered and is called when the tokenizer is done.
 ///
 /// Resolvers are supposed to change the list of events, because parsing is
@@ -107,6 +96,7 @@ struct Attempt {
 /// the compiler and other users.
 pub type Resolver = dyn FnOnce(&mut Tokenizer);
 
+/// Names of functions to move to.
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum StateName {
     AttentionStart,
@@ -447,62 +437,73 @@ pub struct ContainerState {
     pub size: usize,
 }
 
+/// Different kinds of attempts.
+#[derive(Debug, PartialEq)]
+enum AttemptKind {
+    /// Discard what was tokenizer when unsuccessful.
+    Attempt,
+    /// Discard always.
+    Check,
+}
+
+/// How to handle [`State::Ok`][] or [`State::Nok`][].
+#[derive(Debug)]
+struct Attempt {
+    /// Where to go to when successful.
+    ok: State,
+    /// Where to go to when unsuccessful.
+    nok: State,
+    /// Kind of attempt.
+    kind: AttemptKind,
+    /// If needed, the progress to revert to.
+    ///
+    /// It is not needed to discard an [`AttemptKind::Attempt`] that has a
+    /// `nok` of [`State::Nok`][], because that means it is used in *another*
+    /// attempt, which will receive that `Nok`, and has to handle it.
+    progress: Option<Progress>,
+}
+
 /// The internal state of a tokenizer, not to be confused with states from the
 /// state machine, this instead is all the information about where we currently
 /// are and what’s going on.
 #[derive(Debug, Clone)]
-struct InternalState {
-    /// Length of `events`. We only add to events, so reverting will just pop stuff off.
+struct Progress {
+    /// Length of `events`.
+    ///
+    /// It’s not allowed to remove events, so reverting will just pop stuff off.
     events_len: usize,
-    /// Length of the stack. It’s not allowed to decrease the stack in a check or an attempt.
+    /// Length of the stack.
+    ///
+    /// It’s not allowed to decrease the stack in an attempt.
     stack_len: usize,
     /// Previous code.
     previous: Option<u8>,
     /// Current code.
     current: Option<u8>,
-    /// Current relative and absolute position in the file.
+    /// Current place in the file.
     point: Point,
 }
 
-/// To do
+/// A lot of shared fields used to tokenize things.
 #[allow(clippy::struct_excessive_bools)]
 pub struct TokenizeState<'a> {
-    /// To do.
-    pub connect: bool,
-    /// To do.
+    // Couple complex fields used to tokenize the document.
+    /// Tokenizer, used to tokenize flow in document.
+    pub document_child: Option<Box<Tokenizer<'a>>>,
+    /// State, used to tokenize containers.
+    pub document_child_state: Option<State>,
+    /// Stack of currently active containers.
     pub document_container_stack: Vec<ContainerState>,
-    /// To do.
-    pub document_exits: Vec<Option<Vec<Event>>>,
-    /// To do.
+    /// How many active containers continued.
     pub document_continued: usize,
-    /// To do.
-    pub document_paragraph_before: bool,
-    /// To do.
+    /// Index of last `data`.
     pub document_data_index: Option<usize>,
-    /// To do.
-    pub document_child_state: Option<State>,
-    /// To do.
-    pub child_tokenizer: Option<Box<Tokenizer<'a>>>,
-    /// To do.
-    pub marker: u8,
-    /// To do.
-    pub marker_other: u8,
-    /// To do.
-    pub prefix: usize,
-    /// To do.
-    pub return_state: Option<StateName>,
-    /// To do.
-    pub seen: bool,
-    /// To do.
-    pub size: usize,
-    /// To do.
-    pub size_other: usize,
-    /// To do.
-    pub start: usize,
-    /// To do.
-    pub end: usize,
-    /// To do.
-    pub stop: &'static [u8],
+    /// Container exits by line number.
+    pub document_exits: Vec<Option<Vec<Event>>>,
+    /// Whether the previous flow was a paragraph.
+    pub document_paragraph_before: bool,
+
+    // Couple of very frequent settings for parsing whitespace.
     pub space_or_tab_eol_content_type: Option<ContentType>,
     pub space_or_tab_eol_connect: bool,
     pub space_or_tab_eol_ok: bool,
@@ -512,11 +513,50 @@ pub struct TokenizeState<'a> {
     pub space_or_tab_max: usize,
     pub space_or_tab_size: usize,
     pub space_or_tab_token: Token,
-    /// To do.
+
+    // Couple of media related fields.
+    /// Stack of label (start) that could form images and links.
+    ///
+    /// Used when tokenizing [text content][crate::content::text].
+    pub label_start_stack: Vec<LabelStart>,
+    /// Stack of label (start) that cannot form images and links.
+    ///
+    /// Used when tokenizing [text content][crate::content::text].
+    pub label_start_list_loose: Vec<LabelStart>,
+    /// Stack of images and links.
+    ///
+    /// Used when tokenizing [text content][crate::content::text].
+    pub media_list: Vec<Media>,
+
+    /// Whether to connect tokens.
+    pub connect: bool,
+    /// Marker.
+    pub marker: u8,
+    /// Secondary marker.
+    pub marker_b: u8,
+    /// Several markers.
+    pub markers: &'static [u8],
+    /// Whether something was seen.
+    pub seen: bool,
+    /// Size.
+    pub size: usize,
+    /// Secondary size.
+    pub size_b: usize,
+    /// Tertiary size.
+    pub size_c: usize,
+    /// Index.
+    pub start: usize,
+    /// Index.
+    pub end: usize,
+    /// Slot for a token type.
     pub token_1: Token,
+    /// Slot for a token type.
     pub token_2: Token,
+    /// Slot for a token type.
     pub token_3: Token,
+    /// Slot for a token type.
     pub token_4: Token,
+    /// Slot for a token type.
     pub token_5: Token,
 }
 
@@ -525,9 +565,9 @@ pub struct TokenizeState<'a> {
 pub struct Tokenizer<'a> {
     /// Jump between line endings.
     column_start: Vec<(usize, usize)>,
-    // First line.
+    // First line where this tokenizer starts.
     first_line: usize,
-    /// First point after the last line ending.
+    /// Current point after the last line ending (excluding jump).
     line_start: Point,
     /// Track whether the current byte is already consumed (`true`) or expected
     /// to be consumed (`false`).
@@ -536,7 +576,7 @@ pub struct Tokenizer<'a> {
     consumed: bool,
     /// Track whether this tokenizer is done.
     resolved: bool,
-    /// To do.
+    /// Stack of how to handle attempts.
     attempts: Vec<Attempt>,
     /// Current byte.
     pub current: Option<u8>,
@@ -544,7 +584,7 @@ pub struct Tokenizer<'a> {
     pub previous: Option<u8>,
     /// Current relative and absolute place in the file.
     pub point: Point,
-    /// Semantic labels of one or more codes in `codes`.
+    /// Semantic labels.
     pub events: Vec<Event>,
     /// Hierarchy of semantic labels.
     ///
@@ -559,20 +599,8 @@ pub struct Tokenizer<'a> {
     pub resolver_ids: Vec<String>,
     /// Shared parsing state across tokenizers.
     pub parse_state: &'a ParseState<'a>,
-    /// To do.
+    /// A lot of shared fields used to tokenize things.
     pub tokenize_state: TokenizeState<'a>,
-    /// Stack of label (start) that could form images and links.
-    ///
-    /// Used when tokenizing [text content][crate::content::text].
-    pub label_start_stack: Vec<LabelStart>,
-    /// Stack of label (start) that cannot form images and links.
-    ///
-    /// Used when tokenizing [text content][crate::content::text].
-    pub label_start_list_loose: Vec<LabelStart>,
-    /// Stack of images and links.
-    ///
-    /// Used when tokenizing [text content][crate::content::text].
-    pub media_list: Vec<Media>,
     /// Whether we would be interrupting something.
     ///
     /// Used when tokenizing [flow content][crate::content::flow].
@@ -613,17 +641,19 @@ impl<'a> Tokenizer<'a> {
                 document_paragraph_before: false,
                 document_data_index: None,
                 document_child_state: None,
-                child_tokenizer: None,
+                document_child: None,
                 marker: 0,
-                marker_other: 0,
-                prefix: 0,
+                marker_b: 0,
+                markers: &[],
                 seen: false,
                 size: 0,
-                size_other: 0,
+                size_b: 0,
+                size_c: 0,
                 start: 0,
                 end: 0,
-                stop: &[],
-                return_state: None,
+                label_start_stack: vec![],
+                label_start_list_loose: vec![],
+                media_list: vec![],
                 space_or_tab_eol_content_type: None,
                 space_or_tab_eol_connect: false,
                 space_or_tab_eol_ok: false,
@@ -640,15 +670,11 @@ impl<'a> Tokenizer<'a> {
                 token_5: Token::Data,
             },
             map: EditMap::new(),
-            label_start_stack: vec![],
-            label_start_list_loose: vec![],
-            media_list: vec![],
             interrupt: false,
             concrete: false,
             lazy: false,
-            // Assume about 10 resolvers.
-            resolvers: Vec::with_capacity(10),
-            resolver_ids: Vec::with_capacity(10),
+            resolvers: vec![],
+            resolver_ids: vec![],
         }
     }
 
@@ -698,7 +724,7 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Prepare for a next code to get consumed.
-    pub fn expect(&mut self, byte: Option<u8>) {
+    fn expect(&mut self, byte: Option<u8>) {
         debug_assert!(self.consumed, "expected previous byte to be consumed");
         self.consumed = false;
         self.current = byte;
@@ -721,7 +747,7 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Move to the next (virtual) byte.
-    pub fn move_one(&mut self) {
+    fn move_one(&mut self) {
         match byte_action(self.parse_state.bytes, &self.point) {
             ByteAction::Ignore => {
                 self.point.index += 1;
@@ -756,7 +782,7 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Move (virtual) bytes.
-    pub fn move_to(&mut self, to: (usize, usize)) {
+    fn move_to(&mut self, to: (usize, usize)) {
         let (to_index, to_vs) = to;
         while self.point.index < to_index || self.point.index == to_index && self.point.vs < to_vs {
             self.move_one();
@@ -838,9 +864,9 @@ impl<'a> Tokenizer<'a> {
         });
     }
 
-    /// Capture the internal state.
-    fn capture(&mut self) -> InternalState {
-        InternalState {
+    /// Capture the tokenizer progress.
+    fn capture(&mut self) -> Progress {
+        Progress {
             previous: self.previous,
             current: self.current,
             point: self.point.clone(),
@@ -849,8 +875,8 @@ impl<'a> Tokenizer<'a> {
         }
     }
 
-    /// Apply the internal state.
-    fn free(&mut self, previous: InternalState) {
+    /// Apply tokenizer progress.
+    fn free(&mut self, previous: Progress) {
         self.previous = previous.previous;
         self.current = previous.current;
         self.point = previous.point;
@@ -866,123 +892,168 @@ impl<'a> Tokenizer<'a> {
         self.stack.truncate(previous.stack_len);
     }
 
-    /// Parse with `name` and its future states, to check if it result in
-    /// [`State::Ok`][] or [`State::Nok`][], revert on both cases, and then
-    /// call `done` with whether it was successful or not.
-    ///
-    /// This captures the current state of the tokenizer, returns a wrapped
-    /// state that captures all codes and feeds them to `name` and its
-    /// future states until it yields `State::Ok` or `State::Nok`.
-    /// It then applies the captured state, calls `done`, and feeds all
-    /// captured codes to its future states.
+    /// Parse with `name` and its future states, to see if that results in
+    /// [`State::Ok`][] or [`State::Nok`][], then revert in both cases.
     pub fn check(&mut self, name: StateName, ok: State, nok: State) -> State {
-        attempt_impl(self, name, ok, nok, AttemptKind::Check)
+        // Always capture (and restore) when checking.
+        // No need to capture (and restore) when `nok` is `State::Nok`, because the
+        // parent attempt will do it.
+        let progress = Some(self.capture());
+
+        self.attempts.push(Attempt {
+            kind: AttemptKind::Check,
+            progress,
+            ok,
+            nok,
+        });
+
+        call_impl(self, name)
     }
 
-    /// Parse with `name` and its future states, to check if it results in
-    /// [`State::Ok`][] or [`State::Nok`][], revert on the case of
-    /// `State::Nok`, and then call `done` with whether it was successful or
-    /// not.
-    ///
-    /// This captures the current state of the tokenizer, returns a wrapped
-    /// state that captures all codes and feeds them to `name` and its
-    /// future states until it yields `State::Ok`, at which point it calls
-    /// `done` and yields its result.
-    /// If instead `State::Nok` was yielded, the captured state is applied,
-    /// `done` is called, and all captured codes are fed to its future states.
+    /// Parse with `name` and its future states, to see if that results in
+    /// [`State::Ok`][] or [`State::Nok`][], revert in the case of
+    /// `State::Nok`.
     pub fn attempt(&mut self, name: StateName, ok: State, nok: State) -> State {
-        attempt_impl(self, name, ok, nok, AttemptKind::Attempt)
-    }
+        // Always capture (and restore) when checking.
+        // No need to capture (and restore) when `nok` is `State::Nok`, because the
+        // parent attempt will do it.
+        let progress = if nok == State::Nok {
+            None
+        } else {
+            Some(self.capture())
+        };
 
-    /// Feed a list of `codes` into `start`.
-    ///
-    /// This is set up to support repeatedly calling `feed`, and thus streaming
-    /// markdown into the state machine, and normally pauses after feeding.
-    // Note: if needed: accept `vs`?
-    pub fn push(&mut self, min: (usize, usize), max: (usize, usize), name: StateName) -> State {
-        debug_assert!(!self.resolved, "cannot feed after drain");
+        self.attempts.push(Attempt {
+            kind: AttemptKind::Attempt,
+            progress,
+            ok,
+            nok,
+        });
 
-        // debug_assert!(min >= self.point.index, "cannot move backwards");
+        call_impl(self, name)
+    }
 
-        if min.0 > self.point.index || (min.0 == self.point.index && min.1 > self.point.vs) {
-            self.move_to(min);
-        }
+    /// Tokenize.
+    pub fn push(&mut self, from: (usize, usize), to: (usize, usize), state: State) -> State {
+        push_impl(self, from, to, state, false)
+    }
 
-        let mut state = State::Next(name);
+    /// Flush.
+    pub fn flush(&mut self, state: State, resolve: bool) {
+        let to = (self.point.index, self.point.vs);
+        push_impl(self, to, to, state, true);
 
-        while self.point.index < max.0 || (self.point.index == max.0 && self.point.vs < max.1) {
-            match state {
-                State::Ok | State::Nok => {
-                    if let Some(attempt) = self.attempts.pop() {
-                        state = attempt_done_impl(self, attempt, state);
-                    } else {
-                        break;
-                    }
-                }
-                State::Next(name) => {
-                    let action = byte_action(self.parse_state.bytes, &self.point);
-                    state = feed_action_impl(self, &Some(action), name);
-                }
-                State::Retry(name) => {
-                    log::debug!("            retry {:?}", name);
-                    state = call_impl(self, name);
-                }
+        if resolve {
+            self.resolved = true;
+
+            while !self.resolvers.is_empty() {
+                let resolver = self.resolvers.remove(0);
+                resolver(self);
             }
+
+            self.map.consume(&mut self.events);
         }
+    }
+}
 
-        state
+/// Move back past ignored bytes.
+fn move_point_back(tokenizer: &mut Tokenizer, point: &mut Point) {
+    while point.index > 0 {
+        point.index -= 1;
+        let action = byte_action(tokenizer.parse_state.bytes, point);
+        if !matches!(action, ByteAction::Ignore) {
+            point.index += 1;
+            break;
+        }
     }
+}
 
-    /// Flush the tokenizer.
-    pub fn flush(&mut self, mut state: State, resolve: bool) {
-        let max = self.point.index;
+/// Run the tokenizer.
+fn push_impl(
+    tokenizer: &mut Tokenizer,
+    from: (usize, usize),
+    to: (usize, usize),
+    mut state: State,
+    flush: bool,
+) -> State {
+    debug_assert!(!tokenizer.resolved, "cannot feed after drain");
+    debug_assert!(
+        from.0 > tokenizer.point.index
+            || (from.0 == tokenizer.point.index && from.1 >= tokenizer.point.vs),
+        "cannot move backwards"
+    );
+
+    tokenizer.move_to(from);
+
+    loop {
+        match state {
+            State::Ok | State::Nok => {
+                if let Some(attempt) = tokenizer.attempts.pop() {
+                    if attempt.kind == AttemptKind::Check || state == State::Nok {
+                        if let Some(progress) = attempt.progress {
+                            tokenizer.free(progress);
+                        }
+                    }
 
-        self.consumed = true;
+                    tokenizer.consumed = true;
 
-        loop {
-            match state {
-                State::Ok | State::Nok => {
-                    if let Some(attempt) = self.attempts.pop() {
-                        state = attempt_done_impl(self, attempt, state);
+                    let next = if state == State::Ok {
+                        attempt.ok
                     } else {
-                        break;
-                    }
-                }
-                State::Next(name) => {
-                    // We sometimes move back when flushing, so then we use those codes.
-                    state = feed_action_impl(
-                        self,
-                        &if self.point.index == max {
-                            None
-                        } else {
-                            Some(byte_action(self.parse_state.bytes, &self.point))
-                        },
-                        name,
-                    );
-                }
-                State::Retry(name) => {
-                    log::debug!("            retry {:?}", name);
-                    state = call_impl(self, name);
+                        attempt.nok
+                    };
+
+                    log::debug!("attempt: `{:?}` -> `{:?}`", state, next);
+                    state = next;
+                } else {
+                    break;
                 }
             }
-        }
-
-        self.consumed = true;
-        debug_assert!(matches!(state, State::Ok), "must be ok");
+            State::Next(name) => {
+                let action = if tokenizer.point.index < to.0
+                    || (tokenizer.point.index == to.0 && tokenizer.point.vs < to.1)
+                {
+                    Some(byte_action(tokenizer.parse_state.bytes, &tokenizer.point))
+                } else if flush {
+                    None
+                } else {
+                    break;
+                };
 
-        if resolve {
-            self.resolved = true;
+                if let Some(ByteAction::Ignore) = action {
+                    tokenizer.move_one();
+                } else {
+                    let byte =
+                        if let Some(ByteAction::Insert(byte) | ByteAction::Normal(byte)) = action {
+                            Some(byte)
+                        } else {
+                            None
+                        };
 
-            while !self.resolvers.is_empty() {
-                let resolver = self.resolvers.remove(0);
-                resolver(self);
+                    log::debug!("feed:    `{:?}` to {:?}", byte, name);
+                    tokenizer.expect(byte);
+                    state = call_impl(tokenizer, name);
+                };
+            }
+            State::Retry(name) => {
+                log::debug!("retry:   {:?}", name);
+                state = call_impl(tokenizer, name);
             }
-
-            self.map.consume(&mut self.events);
         }
     }
+
+    tokenizer.consumed = true;
+
+    if flush {
+        debug_assert!(matches!(state, State::Ok), "must be ok");
+    } else {
+        debug_assert!(matches!(state, State::Next(_)), "must have a next state");
+    }
+
+    state
 }
 
+/// Figure out how to handle a byte.
 fn byte_action(bytes: &[u8], point: &Point) -> ByteAction {
     if point.index < bytes.len() {
         let byte = bytes[point.index];
@@ -1024,73 +1095,8 @@ fn byte_action(bytes: &[u8], point: &Point) -> ByteAction {
     }
 }
 
-/// Internal utility to wrap states to also capture codes.
-///
-/// Recurses into itself.
-/// Used in [`Tokenizer::attempt`][Tokenizer::attempt] and  [`Tokenizer::check`][Tokenizer::check].
-fn attempt_impl(
-    tokenizer: &mut Tokenizer,
-    name: StateName,
-    ok: State,
-    nok: State,
-    kind: AttemptKind,
-) -> State {
-    // Always capture (and restore) when checking.
-    // No need to capture (and restore) when `nok` is `State::Nok`, because the
-    // parent attempt will do it.
-    let state = if kind == AttemptKind::Check || nok != State::Nok {
-        Some(tokenizer.capture())
-    } else {
-        None
-    };
-
-    tokenizer.attempts.push(Attempt {
-        ok,
-        nok,
-        kind,
-        state,
-    });
-
-    call_impl(tokenizer, name)
-}
-
-fn attempt_done_impl(tokenizer: &mut Tokenizer, attempt: Attempt, state: State) -> State {
-    if attempt.kind == AttemptKind::Check || state == State::Nok {
-        if let Some(state) = attempt.state {
-            tokenizer.free(state);
-        }
-    }
-
-    tokenizer.consumed = true;
-    if state == State::Ok {
-        attempt.ok
-    } else {
-        attempt.nok
-    }
-}
-
-fn feed_action_impl(
-    tokenizer: &mut Tokenizer,
-    action: &Option<ByteAction>,
-    name: StateName,
-) -> State {
-    if let Some(ByteAction::Ignore) = action {
-        tokenizer.move_one();
-        State::Next(name)
-    } else {
-        let byte = if let Some(ByteAction::Insert(byte) | ByteAction::Normal(byte)) = action {
-            Some(*byte)
-        } else {
-            None
-        };
-
-        log::debug!("feed:    `{:?}` to {:?}", byte, name);
-        tokenizer.expect(byte);
-        call_impl(tokenizer, name)
-    }
-}
-
 #[allow(clippy::too_many_lines)]
+/// Call the corresponding function for a state name.
 fn call_impl(tokenizer: &mut Tokenizer, name: StateName) -> State {
     let func = match name {
         StateName::AttentionStart => construct::attention::start,
@@ -1422,15 +1428,3 @@ fn call_impl(tokenizer: &mut Tokenizer, name: StateName) -> State {
 
     func(tokenizer)
 }
-
-fn move_point_back(tokenizer: &mut Tokenizer, point: &mut Point) {
-    // Move back past ignored bytes.
-    while point.index > 0 {
-        point.index -= 1;
-        let action = byte_action(tokenizer.parse_state.bytes, point);
-        if !matches!(action, ByteAction::Ignore) {
-            point.index += 1;
-            break;
-        }
-    }
-}
author	Titus Wormer <tituswormer@gmail.com>	2022-08-11 13:31:20 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-08-11 13:31:20 +0200
commit	2d35cbfceace81a217cd0fbdae7a8777c7a6465e (patch)
tree	e5e69d44c5c00d1dc70f4e3a227f67fd5c771389
parent	053a2603e4bd5ec9caf40617b52136e5ef3fcf0a (diff)
download	markdown-rs-2d35cbfceace81a217cd0fbdae7a8777c7a6465e.tar.gz markdown-rs-2d35cbfceace81a217cd0fbdae7a8777c7a6465e.tar.bz2 markdown-rs-2d35cbfceace81a217cd0fbdae7a8777c7a6465e.zip