Refactor to remove need for cloning codes

author: Titus Wormer <tituswormer@gmail.com> 2022-07-25 15:52:35 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-25 15:52:35 +0200
commit: ca56f2742d8719358d2046fbdee4f1087add0568 (patch)
tree: 702d3ff3ab6ad432bdff59990b7b4211da57afda /src
parent: 11304728b6607bc2a8d41a640308f3379a25b933 (diff)
download: markdown-rs-ca56f2742d8719358d2046fbdee4f1087add0568.tar.gz
markdown-rs-ca56f2742d8719358d2046fbdee4f1087add0568.tar.bz2
markdown-rs-ca56f2742d8719358d2046fbdee4f1087add0568.zip
3 files changed, 26 insertions, 44 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
index f8d7b55..29aaa84 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -78,7 +78,7 @@ struct DocumentInfo {
 pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
     let mut tokenizer = Tokenizer::new(point, parse_state);
 
-    tokenizer.push(parse_state.codes.clone(), Box::new(start), true);
+    tokenizer.push(0, parse_state.codes.len(), Box::new(start), true);
 
     let mut index = 0;
     let mut definitions = vec![];
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 0c9df34..a1ec90a 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -24,7 +24,7 @@
 use crate::content::{string::start as string, text::start as text};
 use crate::parser::ParseState;
 use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer};
-use crate::util::{edit_map::EditMap, span};
+use crate::util::edit_map::EditMap;
 
 /// Create a link between two [`Event`][]s.
 ///
@@ -84,20 +84,15 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
                 } else {
                     text
                 }));
-                let mut size = 0;
 
                 // Loop through links to pass them in order to the subtokenizer.
                 while let Some(index) = link_index {
                     let enter = &events[index];
                     let link_curr = enter.link.as_ref().expect("expected link");
                     assert_eq!(enter.event_type, EventType::Enter);
-                    let span = span::Span {
-                        start_index: enter.point.index,
-                        end_index: events[index + 1].point.index,
-                    };
 
                     if link_curr.previous != None {
-                        tokenizer.define_skip(&enter.point, size);
+                        tokenizer.define_skip(&enter.point);
                     }
 
                     let func = match state {
@@ -106,13 +101,12 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
                     };
 
                     state = tokenizer.push(
-                        span::codes(&parse_state.codes, &span).to_vec(),
+                        enter.point.index,
+                        events[index + 1].point.index,
                         func,
                         link_curr.next == None,
                     );
 
-                    size += span.end_index - span.start_index;
-
                     link_index = link_curr.next;
                 }
 
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7ec0d91..637b34f 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -173,7 +173,7 @@ struct InternalState {
 #[allow(clippy::struct_excessive_bools)]
 pub struct Tokenizer<'a> {
     /// Jump between line endings.
-    column_start: Vec<(usize, usize, usize, usize)>,
+    column_start: Vec<(usize, usize, usize)>,
     // First line.
     line_start: usize,
     /// Track whether a character is expected to be consumed, and whether it’s
@@ -204,7 +204,7 @@ pub struct Tokenizer<'a> {
     resolver_ids: Vec<String>,
     /// Shared parsing state across tokenizers.
     pub parse_state: &'a ParseState<'a>,
-    codes: Vec<Code>,
+    /// To do.
     pub index: usize,
     /// Stack of label (start) that could form images and links.
     ///
@@ -250,7 +250,6 @@ impl<'a> Tokenizer<'a> {
             stack: vec![],
             events: vec![],
             parse_state,
-            codes: vec![],
             index: 0,
             map: EditMap::new(),
             label_start_stack: vec![],
@@ -292,12 +291,8 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Define a jump between two places.
-    pub fn define_skip(&mut self, point: &Point, index: usize) {
-        define_skip_impl(
-            self,
-            point.line,
-            (point.column, point.offset, point.index, index),
-        );
+    pub fn define_skip(&mut self, point: &Point) {
+        define_skip_impl(self, point.line, (point.column, point.offset, point.index));
     }
 
     /// Define the current place as a jump between two places.
@@ -305,12 +300,7 @@ impl<'a> Tokenizer<'a> {
         define_skip_impl(
             self,
             self.point.line,
-            (
-                self.point.column,
-                self.point.offset,
-                self.point.index,
-                self.index,
-            ),
+            (self.point.column, self.point.offset, self.point.index),
         );
     }
 
@@ -320,11 +310,11 @@ impl<'a> Tokenizer<'a> {
         let at = self.point.line - self.line_start;
 
         if self.point.column == 1 && at != self.column_start.len() {
-            let (column, offset, index_abs, index_rel) = &self.column_start[at];
+            let (column, offset, index) = &self.column_start[at];
             self.point.column = *column;
             self.point.offset = *offset;
-            self.point.index = *index_abs;
-            self.index = *index_rel;
+            self.point.index = *index;
+            self.index = *index;
         }
     }
 
@@ -357,7 +347,6 @@ impl<'a> Tokenizer<'a> {
                         self.point.column,
                         self.point.offset,
                         self.point.index,
-                        self.index,
                     ));
                 }
 
@@ -636,18 +625,14 @@ impl<'a> Tokenizer<'a> {
     /// markdown into the state machine, and normally pauses after feeding.
     pub fn push(
         &mut self,
-        mut codes: Vec<Code>,
+        min: usize,
+        max: usize,
         start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
         drain: bool,
     ) -> State {
         assert!(!self.drained, "cannot feed after drain");
 
-        // Let’s assume an event per character.
-        self.events.reserve(codes.len());
-
-        self.codes.append(&mut codes);
-
-        let mut result = feed_impl(self, start);
+        let mut result = feed_impl(self, min, max, start);
 
         if drain {
             let func = match result {
@@ -655,7 +640,7 @@ impl<'a> Tokenizer<'a> {
                 _ => unreachable!("expected next state"),
             };
 
-            result = flush_impl(self, func);
+            result = flush_impl(self, max, func);
 
             self.drained = true;
 
@@ -672,7 +657,7 @@ impl<'a> Tokenizer<'a> {
 
     /// Flush the tokenizer.
     pub fn flush(&mut self, start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static) -> State {
-        flush_impl(self, start)
+        flush_impl(self, self.index, start)
     }
 }
 
@@ -717,14 +702,17 @@ fn attempt_impl(
 /// Feed a list of `codes` into `start`.
 fn feed_impl(
     tokenizer: &mut Tokenizer,
+    min: usize,
+    max: usize,
     start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
 ) -> State {
     let mut state = State::Fn(Box::new(start));
 
+    tokenizer.index = min;
     tokenizer.consumed = true;
 
-    while tokenizer.index < tokenizer.codes.len() {
-        let code = tokenizer.codes[tokenizer.index];
+    while tokenizer.index < max {
+        let code = tokenizer.parse_state.codes[tokenizer.index];
 
         match state {
             State::Ok(_) | State::Nok => {
@@ -744,10 +732,10 @@ fn feed_impl(
 /// Flush `start`: pass `eof`s to it until done.
 fn flush_impl(
     tokenizer: &mut Tokenizer,
+    max: usize,
     start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
 ) -> State {
     let mut state = State::Fn(Box::new(start));
-    let max = tokenizer.index;
     tokenizer.consumed = true;
 
     loop {
@@ -755,7 +743,7 @@ fn flush_impl(
             State::Ok(_) | State::Nok => break,
             State::Fn(func) => {
                 let code = if tokenizer.index < max {
-                    tokenizer.codes[tokenizer.index]
+                    tokenizer.parse_state.codes[tokenizer.index]
                 } else {
                     Code::None
                 };
@@ -778,7 +766,7 @@ fn flush_impl(
 ///
 /// This defines how much columns, offsets, and the `index` are increased when
 /// consuming a line ending.
-fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize, usize)) {
+fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize)) {
     log::debug!("position: define skip: {:?} -> ({:?})", line, info);
     let at = line - tokenizer.line_start;
author	Titus Wormer <tituswormer@gmail.com>	2022-07-25 15:52:35 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-25 15:52:35 +0200
commit	ca56f2742d8719358d2046fbdee4f1087add0568 (patch)
tree	702d3ff3ab6ad432bdff59990b7b4211da57afda /src
parent	11304728b6607bc2a8d41a640308f3379a25b933 (diff)
download	markdown-rs-ca56f2742d8719358d2046fbdee4f1087add0568.tar.gz markdown-rs-ca56f2742d8719358d2046fbdee4f1087add0568.tar.bz2 markdown-rs-ca56f2742d8719358d2046fbdee4f1087add0568.zip