From dfd11b1bc155ae1fba9975a90c2dc83dc07697b4 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 28 Jun 2022 14:18:17 +0200 Subject: Fix jumps in `edit_map` * Use resolve more often (e.g., heading (atx, setext)) * Fix to link whole phrasing (e.g., one big chunk of text in heading (atx, setext), titles, labels) * Replace `ChunkText`, `ChunkString`, with `event.content_type: Option` * Refactor to externalize `edit_map` from `label` --- src/util/edit_map.rs | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/util/mod.rs | 1 + 2 files changed, 145 insertions(+) create mode 100644 src/util/edit_map.rs (limited to 'src/util') diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs new file mode 100644 index 0000000..8136306 --- /dev/null +++ b/src/util/edit_map.rs @@ -0,0 +1,144 @@ +use crate::tokenizer::Event; + +/// To do: could we do without `HashMap`, so we don’t need `std`? +use std::collections::HashMap; + +pub fn shift_links(events: &mut [Event], jumps: &[(usize, isize)]) { + let map = |before| { + let mut jump_index = 0; + let mut jump = 0; + + while jump_index < jumps.len() { + if jumps[jump_index].0 > before { + break; + } + + jump = jumps[jump_index].1; + jump_index += 1; + } + + #[allow(clippy::pedantic)] + let next_i = (before as isize) + jump; + assert!(next_i >= 0, "cannot shift before `0`"); + #[allow(clippy::pedantic)] + let next = next_i as usize; + next + }; + + let mut index = 0; + + while index < events.len() { + let event = &mut events[index]; + event.previous = event.previous.map(map); + event.next = event.next.map(map); + index += 1; + } +} + +/// Make it easy to insert and remove things while being performant and keeping +/// links in check. +pub struct EditMap { + consumed: bool, + map: HashMap)>, +} + +impl EditMap { + /// Create a new edit map. + pub fn new() -> EditMap { + EditMap { + consumed: false, + map: HashMap::new(), + } + } + /// Create an edit: a remove and/or add at a certain place. + pub fn add(&mut self, index: usize, mut remove: usize, mut add: Vec) { + assert!(!self.consumed, "cannot add after consuming"); + + if let Some((curr_remove, mut curr_add)) = self.map.remove(&index) { + remove += curr_remove; + curr_add.append(&mut add); + add = curr_add; + } + + self.map.insert(index, (remove, add)); + } + /// Done, change the events. + pub fn consume(&mut self, events: &mut [Event]) -> Vec { + let mut indices: Vec<&usize> = self.map.keys().collect(); + let mut next_events: Vec = vec![]; + let mut start = 0; + + assert!(!self.consumed, "cannot consume after consuming"); + self.consumed = true; + + let mut index = 0; + + while index < events.len() { + let event = &events[index]; + println!( + "ev: {:?} {:?} {:?} {:?} {:?} {:?}", + index, + event.event_type, + event.token_type, + event.content_type, + event.previous, + event.next + ); + index += 1; + } + + indices.sort_unstable(); + + let mut jumps: Vec<(usize, isize)> = vec![]; + let mut index_into_indices = 0; + let mut shift: isize = 0; + while index_into_indices < indices.len() { + let index = *indices[index_into_indices]; + let edit = self.map.get(&index).unwrap(); + println!("?? {:?} {:?} {:?}", shift, edit.1.len(), edit.0); + + #[allow(clippy::pedantic)] + let next = shift + (edit.1.len() as isize) - (edit.0 as isize); + shift = next; + jumps.push((index, shift)); + index_into_indices += 1; + } + + let mut index_into_indices = 0; + + while index_into_indices < indices.len() { + let index = *indices[index_into_indices]; + + if start < index { + let append = &mut events[start..index].to_vec(); + shift_links(append, &jumps); + next_events.append(append); + } + + let (remove, add) = self.map.get(&index).unwrap(); + + if !add.is_empty() { + let append = &mut add.clone(); + let mut index = 0; + + while index < append.len() { + let event = &mut append[index]; + assert!(event.previous.is_none(), "to do?"); + assert!(event.next.is_none(), "to do?"); + index += 1; + } + + next_events.append(append); + } + + start = index + remove; + index_into_indices += 1; + } + + if start < events.len() { + next_events.append(&mut events[start..].to_vec()); + } + + next_events + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index ee58518..68ef275 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,6 +1,7 @@ //! Utilities used when compiling markdown. pub mod decode_character_reference; +pub mod edit_map; pub mod encode; pub mod normalize_identifier; pub mod sanitize_uri; -- cgit