Use `edit_map` in `subtokenize`

author: Titus Wormer <tituswormer@gmail.com> 2022-07-19 15:36:21 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-19 15:36:21 +0200
commit: ae0f12e668cfd37728aad907c813431595e6cc1b (patch)
tree: 3cdc7282643656633a11c992cd7d1d050924dadc /src/subtokenize.rs
parent: c4cd482fd5006cde338e49104f2abdbd20fd644d (diff)
download: markdown-rs-ae0f12e668cfd37728aad907c813431595e6cc1b.tar.gz
markdown-rs-ae0f12e668cfd37728aad907c813431595e6cc1b.tar.bz2
markdown-rs-ae0f12e668cfd37728aad907c813431595e6cc1b.zip
1 files changed, 40 insertions, 67 deletions
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index ce4f788..174ddfe 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -21,11 +21,10 @@
 //! thus the whole document needs to be parsed up to the level of definitions,
 //! before any level that can include references can be parsed.
 
-use crate::content::{flow::start as flow, string::start as string, text::start as text};
+use crate::content::{string::start as string, text::start as text};
 use crate::parser::ParseState;
 use crate::tokenizer::{ContentType, Event, EventType, State, StateFn, StateFnResult, Tokenizer};
-use crate::util::span;
-use std::collections::HashMap;
+use crate::util::{edit_map::EditMap, span};
 
 /// Create a link between two [`Event`][]s.
 ///
@@ -63,16 +62,9 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
 ///
 /// Supposed to be called repeatedly, returns `1: true` when done.
 pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>, bool) {
-    let mut index = 0;
-    // Map of first chunks to their tokenizer.
-    let mut head_to_tokenizer: HashMap<usize, Tokenizer> = HashMap::new();
-    // Map of chunks to their head and corresponding range of events.
-    let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new();
+    let mut edit_map = EditMap::new();
     let mut done = true;
-
-    if events.is_empty() {
-        return (events, true);
-    }
+    let mut index = 0;
 
     while index < events.len() {
         let event = &events[index];
@@ -83,34 +75,28 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve
 
             // No need to enter linked events again.
             if event.previous == None {
-                done = false;
                 // Index into `events` pointing to a chunk.
-                let mut index_opt: Option<usize> = Some(index);
+                let mut link_index: Option<usize> = Some(index);
                 // Subtokenizer.
                 let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state);
                 // Substate.
                 let mut result: StateFnResult = (
-                    State::Fn(Box::new(if *content_type == ContentType::Flow {
-                        flow
-                    } else if *content_type == ContentType::String {
+                    State::Fn(Box::new(if *content_type == ContentType::String {
                         string
                     } else {
                         text
                     })),
                     None,
                 );
-                // Indices into `codes` of each end of chunk.
-                let mut ends: Vec<usize> = vec![];
 
-                // Loop through chunks to pass them in order to the subtokenizer.
-                while let Some(index_ptr) = index_opt {
-                    let enter = &events[index_ptr];
+                // Loop through links to pass them in order to the subtokenizer.
+                while let Some(index) = link_index {
+                    let enter = &events[index];
                     assert_eq!(enter.event_type, EventType::Enter);
                     let span = span::Span {
                         start_index: enter.index,
-                        end_index: events[index_ptr + 1].index,
+                        end_index: events[index + 1].index,
                     };
-                    ends.push(span.end_index);
 
                     if enter.previous != None {
                         tokenizer.define_skip(&enter.point, enter.index);
@@ -127,32 +113,32 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve
                         enter.next == None,
                     );
                     assert!(result.1.is_none(), "expected no remainder");
-                    index_opt = enter.next;
+                    link_index = enter.next;
                 }
 
-                // Now, loop through all subevents (and `ends`), to figure out
-                // which parts belong where.
-                // Current index.
+                // Now, loop through all subevents to figure out which parts
+                // belong where and fix deep links.
                 let mut subindex = 0;
-                // Index into subevents that starts the current slice.
-                let mut last_start = 0;
-                // Counter into `ends`: the linked token we are at.
-                let mut end_index = 0;
-                let mut index_opt: Option<usize> = Some(index);
+                let mut link_index = index;
+                let mut slices = vec![];
+                let mut slice_start = 0;
 
                 while subindex < tokenizer.events.len() {
                     let subevent = &mut tokenizer.events[subindex];
 
                     // Find the first event that starts after the end we’re looking
                     // for.
-                    if subevent.event_type == EventType::Enter && subevent.index >= ends[end_index]
+                    if subevent.event_type == EventType::Enter
+                        && subevent.index >= events[link_index + 1].index
                     {
-                        let link = index_opt.unwrap();
-                        link_to_info.insert(link, (index, last_start, subindex));
+                        slices.push((link_index, slice_start));
+                        slice_start = subindex;
+                        link_index = events[link_index].next.unwrap();
+                    }
 
-                        last_start = subindex;
-                        end_index += 1;
-                        index_opt = events[link].next;
+                    if subevent.content_type.is_some() {
+                        // Need to call `subtokenize` again.
+                        done = false;
                     }
 
                     // If there is a `next` link in the subevents, we have to change
@@ -163,8 +149,7 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve
                         // The `index` in `events` where the current link is,
                         // minus 2 events (the enter and exit) for each removed
                         // link.
-                        let shift = index_opt.unwrap() - (end_index * 2);
-
+                        let shift = link_index - (slices.len() * 2);
                         subevent.next = Some(next + shift);
                         let next_ev = &mut tokenizer.events[next];
                         let previous = next_ev.previous.unwrap();
@@ -174,36 +159,24 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve
                     subindex += 1;
                 }
 
-                link_to_info.insert(index_opt.unwrap(), (index, last_start, subindex));
-                head_to_tokenizer.insert(index, tokenizer);
-            }
-        }
+                slices.push((link_index, slice_start));
 
-        index += 1;
-    }
-
-    // Now that we fed everything into a tokenizer, and we know which parts
-    // belong where, the final task is to splice the events from each
-    // tokenizer into the current events.
-    // To do: instead of splicing, it might be possible to create a new `events`
-    // from each slice and slices from events?
-    let mut index = events.len() - 1;
-
-    while index > 0 {
-        let slice_opt = link_to_info.get(&index);
-
-        if let Some(slice) = slice_opt {
-            let (head, start, end) = *slice;
-            // If there’s a slice at this index, it must also point to a head,
-            // and that head must have a tokenizer.
-            let tokenizer = head_to_tokenizer.get(&head).unwrap();
+                // Finally, inject the subevents.
+                let mut index = slices.len();
 
-            // To do: figure out a way that moves instead of clones?
-            events.splice(index..(index + 2), tokenizer.events[start..end].to_vec());
+                while index > 0 {
+                    index -= 1;
+                    edit_map.add(
+                        slices[index].0,
+                        2,
+                        tokenizer.events.split_off(slices[index].1),
+                    );
+                }
+            }
         }
 
-        index -= 1;
+        index += 1;
     }
 
-    (events, done)
+    (edit_map.consume(&mut events), done)
 }
author	Titus Wormer <tituswormer@gmail.com>	2022-07-19 15:36:21 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-19 15:36:21 +0200
commit	ae0f12e668cfd37728aad907c813431595e6cc1b (patch)
tree	3cdc7282643656633a11c992cd7d1d050924dadc /src/subtokenize.rs
parent	c4cd482fd5006cde338e49104f2abdbd20fd644d (diff)
download	markdown-rs-ae0f12e668cfd37728aad907c813431595e6cc1b.tar.gz markdown-rs-ae0f12e668cfd37728aad907c813431595e6cc1b.tar.bz2 markdown-rs-ae0f12e668cfd37728aad907c813431595e6cc1b.zip