From bac358ee5c341729e50630f2569a69b4d580ce47 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 22 Jul 2022 18:46:33 +0200 Subject: Refactor to use a single shared edit map --- src/construct/attention.rs | 14 ++++++-------- src/construct/heading_atx.rs | 12 ++++-------- src/construct/heading_setext.rs | 11 ++++------- src/construct/label_end.rs | 18 ++++++++---------- src/construct/list.rs | 10 +++------- src/construct/paragraph.rs | 11 +++++------ src/construct/partial_data.rs | 10 ++++------ src/content/document.rs | 8 +++----- src/tokenizer.rs | 23 +++++++---------------- src/util/edit_map.rs | 15 ++++++--------- 10 files changed, 50 insertions(+), 82 deletions(-) (limited to 'src') diff --git a/src/construct/attention.rs b/src/construct/attention.rs index b4265f0..1aa25c0 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -54,7 +54,6 @@ use crate::token::Token; use crate::tokenizer::{Code, Event, EventType, Point, State, Tokenizer}; use crate::unicode::PUNCTUATION; -use crate::util::edit_map::EditMap; /// Character code kinds. #[derive(Debug, PartialEq)] @@ -201,7 +200,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, marker: MarkerKind) -> State { /// Resolve attention sequences. #[allow(clippy::too_many_lines)] -fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { +fn resolve_attention(tokenizer: &mut Tokenizer) { let codes = &tokenizer.parse_state.codes; let mut start = 0; let mut balance = 0; @@ -340,7 +339,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { // Remove closing sequence if fully used. if sequence_close.size == 0 { sequences.remove(close); - map.add(close_event_index, 2, vec![]); + tokenizer.map.add(close_event_index, 2, vec![]); } else { // Shift remaining closing sequence forward. // Do it here because a sequence can open and close different @@ -362,7 +361,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { // Remove opening sequence if fully used. if sequence_open.size == 0 { sequences.remove(open); - map.add(open_event_index, 2, vec![]); + tokenizer.map.add(open_event_index, 2, vec![]); next_index -= 1; } else { // Shift remaining opening sequence backwards. @@ -372,7 +371,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { } // Opening. - map.add_before( + tokenizer.map.add_before( // Add after the current sequence (it might remain). open_event_index + 2, 0, @@ -420,7 +419,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { ], ); // Closing. - map.add( + tokenizer.map.add( close_event_index, 0, vec![ @@ -484,8 +483,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { index += 1; } - // This resolver is needed. - true + tokenizer.map.consume(&mut tokenizer.events); } /// Classify whether a character code represents whitespace, punctuation, or diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 52eca54..1eabb56 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -58,7 +58,6 @@ use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE}; use crate::token::Token; use crate::tokenizer::{Code, ContentType, Event, EventType, State, Tokenizer}; -use crate::util::edit_map::EditMap; /// Start of a heading (atx). /// @@ -190,7 +189,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> State { } /// Resolve heading (atx). -pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { +pub fn resolve(tokenizer: &mut Tokenizer) { let mut index = 0; let mut heading_start: Option = None; let mut data_start: Option = None; @@ -206,7 +205,7 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { // If `start` is some, `end` is too. let end = data_end.unwrap(); - map.add( + tokenizer.map.add( start, 0, vec![Event { @@ -218,9 +217,9 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { ); // Remove everything between the start and the end. - map.add(start + 1, end - start - 1, vec![]); + tokenizer.map.add(start + 1, end - start - 1, vec![]); - map.add( + tokenizer.map.add( end + 1, 0, vec![Event { @@ -247,7 +246,4 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { index += 1; } - - // This resolver improves events, but is not needed by other resolvers. - false } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 828b7f4..7aa0054 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -61,7 +61,7 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::token::Token; use crate::tokenizer::{Code, EventType, State, Tokenizer}; -use crate::util::{edit_map::EditMap, skip::opt_back as skip_opt_back}; +use crate::util::skip::opt_back as skip_opt_back; /// Kind of underline. #[derive(Debug, Clone, PartialEq)] @@ -196,7 +196,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> State { } /// Resolve heading (setext). -pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { +pub fn resolve(tokenizer: &mut Tokenizer) { let mut index = 0; let mut paragraph_enter = None; let mut paragraph_exit = None; @@ -228,13 +228,10 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let mut heading_exit = tokenizer.events[index].clone(); heading_exit.token_type = Token::HeadingSetext; - map.add(enter, 0, vec![heading_enter]); - map.add(index + 1, 0, vec![heading_exit]); + tokenizer.map.add(enter, 0, vec![heading_enter]); + tokenizer.map.add(index + 1, 0, vec![heading_exit]); } index += 1; } - - // This resolver improves events, but is not needed by other resolvers. - false } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 0b9654d..35dfcdf 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -156,7 +156,6 @@ use crate::construct::{ use crate::token::Token; use crate::tokenizer::{Code, Event, EventType, Media, State, Tokenizer}; use crate::util::{ - edit_map::EditMap, normalize_identifier::normalize_identifier, span::{serialize, Span}, }; @@ -612,7 +611,7 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> State { /// This turns correct label start (image, link) and label end into links and /// images, or turns them back into data. #[allow(clippy::too_many_lines)] -pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { +pub fn resolve_media(tokenizer: &mut Tokenizer) { let mut left = tokenizer.label_start_list_loose.split_off(0); let mut left_2 = tokenizer.label_start_stack.split_off(0); let media = tokenizer.media_list.split_off(0); @@ -627,7 +626,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let data_enter_index = label_start.start.0; let data_exit_index = label_start.start.1; - map.add( + tokenizer.map.add( data_enter_index, data_exit_index - data_enter_index + 1, vec![ @@ -671,7 +670,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let group_end_index = media.end.1; // Insert a group enter and label enter. - map.add( + tokenizer.map.add( group_enter_index, 0, vec![ @@ -697,7 +696,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { // Empty events not allowed. if text_enter_index != text_exit_index { // Insert a text enter. - map.add( + tokenizer.map.add( text_enter_index, 0, vec![Event { @@ -709,7 +708,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { ); // Insert a text exit. - map.add( + tokenizer.map.add( text_exit_index, 0, vec![Event { @@ -722,7 +721,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { } // Insert a label exit. - map.add( + tokenizer.map.add( label_exit_index + 1, 0, vec![Event { @@ -734,7 +733,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { ); // Insert a group exit. - map.add( + tokenizer.map.add( group_end_index + 1, 0, vec![Event { @@ -748,6 +747,5 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { index += 1; } - // This resolver is needed to figure out interleaving with attention. - true + tokenizer.map.consume(&mut tokenizer.events); } diff --git a/src/construct/list.rs b/src/construct/list.rs index 96113e6..7437d4a 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -52,7 +52,6 @@ use crate::construct::{ use crate::token::Token; use crate::tokenizer::{Code, EventType, State, Tokenizer}; use crate::util::{ - edit_map::EditMap, skip, span::{codes as codes_from_span, from_exit_event}, }; @@ -388,7 +387,7 @@ fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> State { } /// Find adjacent list items with the same marker. -pub fn resolve_list_item(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { +pub fn resolve_list_item(tokenizer: &mut Tokenizer) { let mut index = 0; let mut balance = 0; let mut lists_wip: Vec<(Kind, usize, usize, usize)> = vec![]; @@ -483,12 +482,9 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { list_start.token_type = token_type.clone(); list_end.token_type = token_type; - map.add(list_item.2, 0, vec![list_start]); - map.add(list_item.3 + 1, 0, vec![list_end]); + tokenizer.map.add(list_item.2, 0, vec![list_start]); + tokenizer.map.add(list_item.3 + 1, 0, vec![list_end]); index += 1; } - - // This resolver improves events, but is not needed by other resolvers. - false } diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 811bc75..5409532 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -34,7 +34,7 @@ use crate::token::Token; use crate::tokenizer::{Code, ContentType, EventType, State, Tokenizer}; -use crate::util::{edit_map::EditMap, skip::opt as skip_opt}; +use crate::util::skip::opt as skip_opt; /// Before a paragraph. /// @@ -80,7 +80,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> State { /// Merge “`Paragraph`”s, which currently span a single line, into actual /// `Paragraph`s that span multiple lines. -pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { +pub fn resolve(tokenizer: &mut Tokenizer) { let len = tokenizer.events.len(); let mut index = 0; @@ -104,10 +104,10 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { && tokenizer.events[enter_next_index].token_type == Token::Paragraph { // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph. - map.add(exit_index, 3, vec![]); + tokenizer.map.add(exit_index, 3, vec![]); // Remove Enter:Paragraph. - map.add(enter_next_index, 1, vec![]); + tokenizer.map.add(enter_next_index, 1, vec![]); // Add Exit:LineEnding position info to Exit:Data. let line_ending_exit = &tokenizer.events[exit_index + 2]; @@ -141,6 +141,5 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { index += 1; } - // This resolver is needed by setext headings. - true + tokenizer.map.consume(&mut tokenizer.events); } diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index 11064e6..d60ef36 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -8,7 +8,6 @@ use crate::token::Token; use crate::tokenizer::{Code, EventType, State, Tokenizer}; -use crate::util::edit_map::EditMap; /// At the beginning of data. /// @@ -75,7 +74,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: &'static [Code]) -> State { } /// Merge adjacent data events. -pub fn resolve_data(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { +pub fn resolve_data(tokenizer: &mut Tokenizer) { let len = tokenizer.events.len(); let mut index = 0; @@ -95,7 +94,9 @@ pub fn resolve_data(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { } if exit_far_index > exit_index { - map.add(exit_index, exit_far_index - exit_index, vec![]); + tokenizer + .map + .add(exit_index, exit_far_index - exit_index, vec![]); // Change positional info. let exit_far = &tokenizer.events[exit_far_index]; @@ -108,7 +109,4 @@ pub fn resolve_data(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { index += 1; } - - // This resolver helps, but is not required for other resolvers. - false } diff --git a/src/content/document.rs b/src/content/document.rs index f2c73e4..c1017a7 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -19,7 +19,6 @@ use crate::token::Token; use crate::tokenizer::{ Code, Container, ContainerState, Event, EventType, Point, State, StateFn, Tokenizer, }; -use crate::util::edit_map::EditMap; use crate::util::{ normalize_identifier::normalize_identifier, skip, @@ -454,7 +453,6 @@ fn exit_containers( // Inject the container events. fn resolve(tokenizer: &mut Tokenizer, info: &mut DocumentInfo) { - let mut map = EditMap::new(); let mut index = 0; let mut inject = info.inject.split_off(0); inject.reverse(); @@ -463,7 +461,7 @@ fn resolve(tokenizer: &mut Tokenizer, info: &mut DocumentInfo) { while let Some((before, mut after)) = inject.pop() { if !before.is_empty() { first_line_ending_in_run = None; - map.add(index, 0, before); + tokenizer.map.add(index, 0, before); } while index < tokenizer.events.len() { @@ -499,8 +497,8 @@ fn resolve(tokenizer: &mut Tokenizer, info: &mut DocumentInfo) { subevent_index += 1; } - map.add(close_index, 0, after); + tokenizer.map.add(close_index, 0, after); } - map.consume(&mut tokenizer.events); + tokenizer.map.consume(&mut tokenizer.events); } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 685198e..eb8bac4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -95,7 +95,7 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> State; /// Resolvers are supposed to change the list of events, because parsing is /// sometimes messy, and they help expose a cleaner interface of events to /// the compiler and other users. -pub type Resolver = dyn FnOnce(&mut Tokenizer, &mut EditMap) -> bool; +pub type Resolver = dyn FnOnce(&mut Tokenizer); /// The result of a state. pub enum State { @@ -191,6 +191,8 @@ pub struct Tokenizer<'a> { pub stack: Vec, /// Previous character code. pub previous: Code, + /// To do. + pub map: EditMap, /// Current character code. current: Code, /// Current relative and absolute place in the file. @@ -246,6 +248,7 @@ impl<'a> Tokenizer<'a> { stack: vec![], events: vec![], parse_state, + map: EditMap::new(), label_start_stack: vec![], label_start_list_loose: vec![], media_list: vec![], @@ -255,7 +258,7 @@ impl<'a> Tokenizer<'a> { container: None, // Assume about 10 resolvers. resolvers: Vec::with_capacity(10), - resolver_ids: Vec::with_capacity(10) + resolver_ids: Vec::with_capacity(10), } } @@ -635,25 +638,13 @@ impl<'a> Tokenizer<'a> { result = flush_impl(self, func); self.drained = true; - let mut map = EditMap::new(); - let mut consumed = false; while !self.resolvers.is_empty() { let resolver = self.resolvers.remove(0); - let consume = resolver(self, &mut map); - - if consume { - map.consume(&mut self.events); - consumed = true; - map = EditMap::new(); - } else { - consumed = false; - } + resolver(self); } - if !consumed { - map.consume(&mut self.events); - } + self.map.consume(&mut self.events); } result diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index 3bcef48..4d9f557 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -57,8 +57,6 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) { /// links in check. #[derive(Debug)] pub struct EditMap { - /// Whether this map was consumed already. - consumed: bool, /// Record of changes. map: Vec<(usize, usize, Vec)>, } @@ -66,10 +64,7 @@ pub struct EditMap { impl EditMap { /// Create a new edit map. pub fn new() -> EditMap { - EditMap { - consumed: false, - map: vec![], - } + EditMap { map: vec![] } } /// Create an edit: a remove and/or add at a certain place. pub fn add(&mut self, index: usize, remove: usize, add: Vec) { @@ -84,8 +79,9 @@ impl EditMap { self.map .sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); - assert!(!self.consumed, "cannot consume after consuming"); - self.consumed = true; + if self.map.is_empty() { + return; + } // Calculate jumps: where items in the current list move to. let mut jumps = Vec::with_capacity(self.map.len()); @@ -118,12 +114,13 @@ impl EditMap { while let Some(mut slice) = vecs.pop() { events.append(&mut slice); } + + self.map.truncate(0); } } /// Create an edit. fn add_impl(edit_map: &mut EditMap, at: usize, remove: usize, mut add: Vec, before: bool) { - assert!(!edit_map.consumed, "cannot add after consuming"); let mut index = 0; if remove == 0 && add.is_empty() { -- cgit