From 75c2109c6051009b220436bd823970a374f4f9fd Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Wed, 20 Jul 2022 18:00:52 +0200 Subject: Refactor to share edit map --- src/construct/attention.rs | 14 +++++++------- src/construct/heading_atx.rs | 12 ++++++------ src/construct/heading_setext.rs | 10 +++++----- src/construct/label_end.rs | 18 +++++++++--------- src/construct/list.rs | 10 +++++----- src/construct/paragraph.rs | 10 +++++----- src/construct/partial_data.rs | 8 ++++---- src/subtokenize.rs | 6 +++--- src/tokenizer.rs | 19 +++++++++++++++++-- 9 files changed, 61 insertions(+), 46 deletions(-) diff --git a/src/construct/attention.rs b/src/construct/attention.rs index 2cbc563..f0de63f 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -205,9 +205,8 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, marker: MarkerKind) -> StateFnR /// Resolve attention sequences. #[allow(clippy::too_many_lines)] -fn resolve_attention(tokenizer: &mut Tokenizer) { +fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let codes = &tokenizer.parse_state.codes; - let mut edit_map = EditMap::new(); let mut start = 0; let mut balance = 0; let mut sequences: Vec = vec![]; @@ -353,7 +352,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { // Remove closing sequence if fully used. if sequence_close.size == 0 { sequences.remove(close); - edit_map.add(close_event_index, 2, vec![]); + map.add(close_event_index, 2, vec![]); } else { // Shift remaining closing sequence forward. // Do it here because a sequence can open and close different @@ -376,7 +375,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { // Remove opening sequence if fully used. if sequence_open.size == 0 { sequences.remove(open); - edit_map.add(open_event_index, 2, vec![]); + map.add(open_event_index, 2, vec![]); next_index -= 1; } else { // Shift remaining opening sequence backwards. @@ -387,7 +386,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { } // Opening. - edit_map.add_before( + map.add_before( // Add after the current sequence (it might remain). open_event_index + 2, 0, @@ -447,7 +446,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { ], ); // Closing. - edit_map.add( + map.add( close_event_index, 0, vec![ @@ -523,7 +522,8 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { index += 1; } - edit_map.consume(&mut tokenizer.events); + // This resolver is needed. + true } /// Classify whether a character code represents whitespace, punctuation, or diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index feb1e9d..bf3de23 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -193,8 +193,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } /// Resolve heading (atx). -pub fn resolve(tokenizer: &mut Tokenizer) { - let mut edit_map = EditMap::new(); +pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let mut index = 0; let mut heading_start: Option = None; let mut data_start: Option = None; @@ -210,7 +209,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { // If `start` is some, `end` is too. let end = data_end.unwrap(); - edit_map.add( + map.add( start, 0, vec![Event { @@ -225,9 +224,9 @@ pub fn resolve(tokenizer: &mut Tokenizer) { ); // Remove everything between the start and the end. - edit_map.add(start + 1, end - start - 1, vec![]); + map.add(start + 1, end - start - 1, vec![]); - edit_map.add( + map.add( end + 1, 0, vec![Event { @@ -258,5 +257,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) { index += 1; } - edit_map.consume(&mut tokenizer.events); + // This resolver improves events, but is not needed by other resolvers. + false } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index d1e7d57..ee6c23c 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -196,8 +196,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } /// Resolve heading (setext). -pub fn resolve(tokenizer: &mut Tokenizer) { - let mut edit_map = EditMap::new(); +pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let mut index = 0; let mut paragraph_enter: Option = None; let mut paragraph_exit: Option = None; @@ -229,12 +228,13 @@ pub fn resolve(tokenizer: &mut Tokenizer) { let mut heading_exit = tokenizer.events[index].clone(); heading_exit.token_type = Token::HeadingSetext; - edit_map.add(enter, 0, vec![heading_enter]); - edit_map.add(index + 1, 0, vec![heading_exit]); + map.add(enter, 0, vec![heading_enter]); + map.add(index + 1, 0, vec![heading_exit]); } index += 1; } - edit_map.consume(&mut tokenizer.events); + // This resolver improves events, but is not needed by other resolvers. + false } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 6bd634f..bcfe343 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -612,13 +612,12 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes /// This turns correct label start (image, link) and label end into links and /// images, or turns them back into data. #[allow(clippy::too_many_lines)] -pub fn resolve_media(tokenizer: &mut Tokenizer) { +pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let mut left = tokenizer.label_start_list_loose.split_off(0); let mut left_2 = tokenizer.label_start_stack.split_off(0); let media = tokenizer.media_list.split_off(0); left.append(&mut left_2); - let mut edit_map = EditMap::new(); let events = &tokenizer.events; // Remove loose label starts. @@ -628,7 +627,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { let data_enter_index = label_start.start.0; let data_exit_index = label_start.start.1; - edit_map.add( + map.add( data_enter_index, data_exit_index - data_enter_index + 1, vec![ @@ -678,7 +677,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { let group_end_index = media.end.1; // Insert a group enter and label enter. - edit_map.add( + map.add( group_enter_index, 0, vec![ @@ -710,7 +709,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { // Empty events not allowed. if text_enter_index != text_exit_index { // Insert a text enter. - edit_map.add( + map.add( text_enter_index, 0, vec![Event { @@ -725,7 +724,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { ); // Insert a text exit. - edit_map.add( + map.add( text_exit_index, 0, vec![Event { @@ -741,7 +740,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { } // Insert a label exit. - edit_map.add( + map.add( label_exit_index + 1, 0, vec![Event { @@ -756,7 +755,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { ); // Insert a group exit. - edit_map.add( + map.add( group_end_index + 1, 0, vec![Event { @@ -773,5 +772,6 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { index += 1; } - edit_map.consume(&mut tokenizer.events); + // This resolver is needed to figure out interleaving with attention. + true } diff --git a/src/construct/list.rs b/src/construct/list.rs index 48ed291..12c666b 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -390,8 +390,7 @@ fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult { } /// Find adjacent list items with the same marker. -pub fn resolve_list_item(tokenizer: &mut Tokenizer) { - let mut edit_map = EditMap::new(); +pub fn resolve_list_item(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let mut index = 0; let mut balance = 0; let mut lists_wip: Vec<(Kind, usize, usize, usize)> = vec![]; @@ -486,11 +485,12 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) { list_start.token_type = token_type.clone(); list_end.token_type = token_type; - edit_map.add(list_item.2, 0, vec![list_start]); - edit_map.add(list_item.3 + 1, 0, vec![list_end]); + map.add(list_item.2, 0, vec![list_start]); + map.add(list_item.3 + 1, 0, vec![list_end]); index += 1; } - edit_map.consume(&mut tokenizer.events); + // This resolver improves events, but is not needed by other resolvers. + false } diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 1b186e3..134a4b5 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -80,8 +80,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// Merge “`Paragraph`”s, which currently span a single line, into actual /// `Paragraph`s that span multiple lines. -pub fn resolve(tokenizer: &mut Tokenizer) { - let mut edit_map = EditMap::new(); +pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let len = tokenizer.events.len(); let mut index = 0; @@ -105,10 +104,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) { && tokenizer.events[enter_next_index].token_type == Token::Paragraph { // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph. - edit_map.add(exit_index, 3, vec![]); + map.add(exit_index, 3, vec![]); // Remove Enter:Paragraph. - edit_map.add(enter_next_index, 1, vec![]); + map.add(enter_next_index, 1, vec![]); // Add Exit:LineEnding position info to Exit:Data. let line_ending_exit = &tokenizer.events[exit_index + 2]; @@ -142,5 +141,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) { index += 1; } - edit_map.consume(&mut tokenizer.events); + // This resolver is needed by setext headings. + true } diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index b56efd2..ea71bcf 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -75,8 +75,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec) -> StateFnResult } /// Merge adjacent data events. -pub fn resolve_data(tokenizer: &mut Tokenizer) { - let mut edit_map = EditMap::new(); +pub fn resolve_data(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { let len = tokenizer.events.len(); let mut index = 0; @@ -96,7 +95,7 @@ pub fn resolve_data(tokenizer: &mut Tokenizer) { } if exit_far_index > exit_index { - edit_map.add(exit_index, exit_far_index - exit_index, vec![]); + map.add(exit_index, exit_far_index - exit_index, vec![]); // Change positional info. let exit_far = &tokenizer.events[exit_far_index]; @@ -114,5 +113,6 @@ pub fn resolve_data(tokenizer: &mut Tokenizer) { index += 1; } - edit_map.consume(&mut tokenizer.events); + // This resolver helps, but is not required for other resolvers. + false } diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 7b7d6bd..272978d 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -62,7 +62,7 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { /// /// Supposed to be called repeatedly, returns `1: true` when done. pub fn subtokenize(events: &mut Vec, parse_state: &ParseState) -> bool { - let mut edit_map = EditMap::new(); + let mut map = EditMap::new(); let mut done = true; let mut index = 0; @@ -166,7 +166,7 @@ pub fn subtokenize(events: &mut Vec, parse_state: &ParseState) -> bool { while index > 0 { index -= 1; - edit_map.add( + map.add( slices[index].0, 2, tokenizer.events.split_off(slices[index].1), @@ -178,7 +178,7 @@ pub fn subtokenize(events: &mut Vec, parse_state: &ParseState) -> bool { index += 1; } - edit_map.consume(events); + map.consume(events); done } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index cba1752..5d03c92 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -13,6 +13,7 @@ use crate::parser::ParseState; use crate::token::{Token, VOID_TOKENS}; +use crate::util::edit_map::EditMap; /// Embedded content type. #[derive(Debug, Clone, Copy, PartialEq)] @@ -91,7 +92,7 @@ pub type StateFnResult = (State, Option>); /// Resolvers are supposed to change the list of events, because parsing is /// sometimes messy, and they help expose a cleaner interface of events to /// the compiler and other users. -pub type Resolver = dyn FnOnce(&mut Tokenizer); +pub type Resolver = dyn FnOnce(&mut Tokenizer, &mut EditMap) -> bool; /// The result of a state. pub enum State { @@ -624,10 +625,24 @@ impl<'a> Tokenizer<'a> { result = flush_impl(self, func); self.drained = true; + let mut map = EditMap::new(); + let mut consumed = false; while !self.resolvers.is_empty() { let resolver = self.resolvers.remove(0); - resolver(self); + let consume = resolver(self, &mut map); + + if consume { + map.consume(&mut self.events); + consumed = true; + map = EditMap::new(); + } else { + consumed = false; + } + } + + if !consumed { + map.consume(&mut self.events); } } -- cgit