From 0f20660cb95abd4f407bdafa2c45e01829fa971f Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Tue, 19 Jul 2022 17:56:57 +0200
Subject: Refactor to remove cloning in `edit_map`

---
 src/construct/attention.rs      |  2 +-
 src/construct/heading_atx.rs    |  2 +-
 src/construct/heading_setext.rs |  2 +-
 src/construct/label_end.rs      | 17 +++++++---------
 src/construct/list.rs           |  8 +++-----
 src/construct/paragraph.rs      |  2 +-
 src/construct/partial_data.rs   |  2 +-
 src/content/document.rs         |  8 ++++----
 src/subtokenize.rs              |  4 ++--
 src/tokenizer.rs                |  1 +
 src/util/edit_map.rs            | 44 +++++++++++++++++++++++------------------
 11 files changed, 47 insertions(+), 45 deletions(-)

(limited to 'src')

diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 3e15f9a..7e99600 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -523,7 +523,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) -> Vec<Event> {
         index += 1;
     }
 
-    edit_map.consume(&mut tokenizer.events)
+    edit_map.consume(tokenizer.events.split_off(0))
 }
 
 /// Classify whether a character code represents whitespace, punctuation, or
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 68a6be7..4546924 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -258,5 +258,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
         index += 1;
     }
 
-    edit_map.consume(&mut tokenizer.events)
+    edit_map.consume(tokenizer.events.split_off(0))
 }
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 7cd259b..841bf53 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -236,5 +236,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
         index += 1;
     }
 
-    edit_map.consume(&mut tokenizer.events)
+    edit_map.consume(tokenizer.events.split_off(0))
 }
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 2ac2500..2124681 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -154,9 +154,7 @@ use crate::construct::{
     partial_title::{start as title, Options as TitleOptions},
 };
 use crate::token::Token;
-use crate::tokenizer::{
-    Code, Event, EventType, LabelStart, Media, State, StateFnResult, Tokenizer,
-};
+use crate::tokenizer::{Code, Event, EventType, Media, State, StateFnResult, Tokenizer};
 use crate::util::{
     edit_map::EditMap,
     normalize_identifier::normalize_identifier,
@@ -325,10 +323,9 @@ fn reference_not_full(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
 /// ```
 fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
     // Remove this one and everything after it.
-    let mut left: Vec<LabelStart> = tokenizer
+    let mut left = tokenizer
         .label_start_stack
-        .drain(info.label_start_index..)
-        .collect();
+        .split_off(info.label_start_index);
     // Remove this one from `left`, as we’ll move it to `media_list`.
     left.remove(0);
     tokenizer.label_start_list_loose.append(&mut left);
@@ -616,9 +613,9 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes
 /// images, or turns them back into data.
 #[allow(clippy::too_many_lines)]
 pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
-    let mut left: Vec<LabelStart> = tokenizer.label_start_list_loose.drain(..).collect();
-    let mut left_2: Vec<LabelStart> = tokenizer.label_start_stack.drain(..).collect();
-    let media: Vec<Media> = tokenizer.media_list.drain(..).collect();
+    let mut left = tokenizer.label_start_list_loose.split_off(0);
+    let mut left_2 = tokenizer.label_start_stack.split_off(0);
+    let media = tokenizer.media_list.split_off(0);
     left.append(&mut left_2);
 
     let mut edit_map = EditMap::new();
@@ -776,5 +773,5 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
         index += 1;
     }
 
-    edit_map.consume(&mut tokenizer.events)
+    edit_map.consume(tokenizer.events.split_off(0))
 }
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 9cd3b62..db8af36 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -432,8 +432,7 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) -> Vec<Event> {
                     if previous.0 == current.0 && previous.1 == current.1 && before == current.2 {
                         let previous_mut = &mut lists_wip[list_index];
                         previous_mut.3 = current.3;
-                        let mut remainder = lists_wip.drain((list_index + 1)..).collect::<Vec<_>>();
-                        lists.append(&mut remainder);
+                        lists.append(&mut lists_wip.split_off(list_index + 1));
                         matched = true;
                         break;
                     }
@@ -457,8 +456,7 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) -> Vec<Event> {
                     }
 
                     if let Some(exit) = exit {
-                        let mut remainder = lists_wip.drain(exit..).collect::<Vec<_>>();
-                        lists.append(&mut remainder);
+                        lists.append(&mut lists_wip.split_off(exit));
                     }
 
                     lists_wip.push(current);
@@ -494,5 +492,5 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) -> Vec<Event> {
         index += 1;
     }
 
-    edit_map.consume(&mut tokenizer.events)
+    edit_map.consume(tokenizer.events.split_off(0))
 }
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 74dca87..53030f4 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -142,5 +142,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
         index += 1;
     }
 
-    edit_map.consume(&mut tokenizer.events)
+    edit_map.consume(tokenizer.events.split_off(0))
 }
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 3bc8c1c..b59bb76 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -114,5 +114,5 @@ pub fn resolve_data(tokenizer: &mut Tokenizer) -> Vec<Event> {
         index += 1;
     }
 
-    edit_map.consume(&mut tokenizer.events)
+    edit_map.consume(tokenizer.events.split_off(0))
 }
diff --git a/src/content/document.rs b/src/content/document.rs
index 63c2476..53e58c4 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -352,7 +352,7 @@ fn containers_after(
         .last_mut()
         .unwrap()
         .0
-        .append(&mut tokenizer.events.drain(info.index..).collect::<Vec<_>>());
+        .append(&mut tokenizer.events.split_off(info.index));
 
     tokenizer.lazy = info.continued != info.stack.len();
     tokenizer.interrupt = info.interrupt_before;
@@ -433,12 +433,12 @@ fn exit_containers(
     mut info: DocumentInfo,
     phase: &Phase,
 ) -> DocumentInfo {
-    let mut stack_close = info.stack.drain(info.continued..).collect::<Vec<_>>();
+    let mut stack_close = info.stack.split_off(info.continued);
 
     // So, we’re at the end of a line, but we need to close the *previous* line.
     if *phase != Phase::Eof {
         tokenizer.define_skip_current();
-        let mut current_events = tokenizer.events.drain(info.index..).collect::<Vec<_>>();
+        let mut current_events = tokenizer.events.split_off(info.index);
         let next = info.next;
         info.next = Box::new(flow); // This is weird but Rust needs a function there.
         let result = tokenizer.flush(next);
@@ -537,5 +537,5 @@ fn resolve(tokenizer: &mut Tokenizer, info: &DocumentInfo) -> Vec<Event> {
         add,
     );
 
-    map.consume(&mut tokenizer.events)
+    map.consume(tokenizer.events.split_off(0))
 }
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 174ddfe..df7b015 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -61,7 +61,7 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
 /// Parse linked events.
 ///
 /// Supposed to be called repeatedly, returns `1: true` when done.
-pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>, bool) {
+pub fn subtokenize(events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>, bool) {
     let mut edit_map = EditMap::new();
     let mut done = true;
     let mut index = 0;
@@ -178,5 +178,5 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve
         index += 1;
     }
 
-    (edit_map.consume(&mut events), done)
+    (edit_map.consume(events), done)
 }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 8f85af0..4e184f4 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -730,6 +730,7 @@ fn feed_impl(
     }
 
     // Yield to a higher loop.
+    // To do: do not copy?
     check_statefn_result((state, Some(codes[index..].to_vec())))
 }
 
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
index 90ff483..1f43a3a 100644
--- a/src/util/edit_map.rs
+++ b/src/util/edit_map.rs
@@ -72,11 +72,9 @@ impl EditMap {
         add_impl(self, index, remove, add, true);
     }
     /// Done, change the events.
-    pub fn consume(&mut self, events: &mut [Event]) -> Vec<Event> {
+    pub fn consume(&mut self, mut events: Vec<Event>) -> Vec<Event> {
         self.map
             .sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
-        let mut next_events: Vec<Event> = vec![];
-        let mut start = 0;
 
         assert!(!self.consumed, "cannot consume after consuming");
         self.consumed = true;
@@ -94,28 +92,36 @@ impl EditMap {
             index += 1;
         }
 
-        let mut index = 0;
+        let mut index = self.map.len();
+        let mut vecs: Vec<Vec<Event>> = vec![];
+        let mut capacity = 0;
 
-        while index < self.map.len() {
+        while index > 0 {
+            index -= 1;
             let at = self.map[index].0;
-            let remove = self.map[index].1;
-            let mut add = self.map[index].2.drain(..).collect::<Vec<_>>();
 
-            if start < at {
-                let append = &mut events[start..at].to_vec();
-                shift_links(append, &jumps);
-                next_events.append(append);
-            }
+            let mut keep = events.split_off(at + self.map[index].1);
+            shift_links(&mut keep, &jumps);
+            capacity += keep.len();
+            vecs.push(keep);
 
-            next_events.append(&mut add);
-            start = at + remove;
-            index += 1;
+            let add = self.map[index].2.split_off(0);
+            capacity += add.len();
+            vecs.push(add);
+
+            events.truncate(at);
         }
 
-        if start < events.len() {
-            let append = &mut events[start..].to_vec();
-            shift_links(append, &jumps);
-            next_events.append(append);
+        shift_links(&mut events, &jumps);
+        capacity += events.len();
+        vecs.push(events);
+
+        let mut next_events: Vec<Event> = Vec::with_capacity(capacity);
+        let mut slice = vecs.pop();
+
+        while let Some(mut x) = slice {
+            next_events.append(&mut x);
+            slice = vecs.pop();
         }
 
         next_events
-- 
cgit