diff options
-rw-r--r-- | src/construct/attention.rs | 4 | ||||
-rw-r--r-- | src/construct/heading_atx.rs | 4 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 6 | ||||
-rw-r--r-- | src/construct/label_end.rs | 4 | ||||
-rw-r--r-- | src/construct/list.rs | 6 | ||||
-rw-r--r-- | src/construct/paragraph.rs | 6 | ||||
-rw-r--r-- | src/construct/partial_data.rs | 6 | ||||
-rw-r--r-- | src/content/document.rs | 15 | ||||
-rw-r--r-- | src/subtokenize.rs | 6 | ||||
-rw-r--r-- | src/tokenizer.rs | 12 | ||||
-rw-r--r-- | src/util/edit_map.rs | 70 |
11 files changed, 63 insertions, 76 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index 7e99600..2cbc563 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -205,7 +205,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, marker: MarkerKind) -> StateFnR /// Resolve attention sequences. #[allow(clippy::too_many_lines)] -fn resolve_attention(tokenizer: &mut Tokenizer) -> Vec<Event> { +fn resolve_attention(tokenizer: &mut Tokenizer) { let codes = &tokenizer.parse_state.codes; let mut edit_map = EditMap::new(); let mut start = 0; @@ -523,7 +523,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) -> Vec<Event> { index += 1; } - edit_map.consume(tokenizer.events.split_off(0)) + edit_map.consume(&mut tokenizer.events); } /// Classify whether a character code represents whitespace, punctuation, or diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 4546924..feb1e9d 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -193,7 +193,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } /// Resolve heading (atx). -pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { +pub fn resolve(tokenizer: &mut Tokenizer) { let mut edit_map = EditMap::new(); let mut index = 0; let mut heading_start: Option<usize> = None; @@ -258,5 +258,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { index += 1; } - edit_map.consume(tokenizer.events.split_off(0)) + edit_map.consume(&mut tokenizer.events); } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 841bf53..d1e7d57 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -60,7 +60,7 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::token::Token; -use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, Tokenizer}; +use crate::tokenizer::{Code, EventType, State, StateFnResult, Tokenizer}; use crate::util::{edit_map::EditMap, skip::opt_back as skip_opt_back}; /// Kind of underline. @@ -196,7 +196,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } /// Resolve heading (setext). -pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { +pub fn resolve(tokenizer: &mut Tokenizer) { let mut edit_map = EditMap::new(); let mut index = 0; let mut paragraph_enter: Option<usize> = None; @@ -236,5 +236,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { index += 1; } - edit_map.consume(tokenizer.events.split_off(0)) + edit_map.consume(&mut tokenizer.events); } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 2124681..6bd634f 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -612,7 +612,7 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes /// This turns correct label start (image, link) and label end into links and /// images, or turns them back into data. #[allow(clippy::too_many_lines)] -pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> { +pub fn resolve_media(tokenizer: &mut Tokenizer) { let mut left = tokenizer.label_start_list_loose.split_off(0); let mut left_2 = tokenizer.label_start_stack.split_off(0); let media = tokenizer.media_list.split_off(0); @@ -773,5 +773,5 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> { index += 1; } - edit_map.consume(tokenizer.events.split_off(0)) + edit_map.consume(&mut tokenizer.events); } diff --git a/src/construct/list.rs b/src/construct/list.rs index db8af36..48ed291 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -50,7 +50,7 @@ use crate::construct::{ thematic_break::start as thematic_break, }; use crate::token::Token; -use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, Tokenizer}; +use crate::tokenizer::{Code, EventType, State, StateFnResult, Tokenizer}; use crate::util::{ edit_map::EditMap, skip, @@ -390,7 +390,7 @@ fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult { } /// Find adjacent list items with the same marker. -pub fn resolve_list_item(tokenizer: &mut Tokenizer) -> Vec<Event> { +pub fn resolve_list_item(tokenizer: &mut Tokenizer) { let mut edit_map = EditMap::new(); let mut index = 0; let mut balance = 0; @@ -492,5 +492,5 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) -> Vec<Event> { index += 1; } - edit_map.consume(tokenizer.events.split_off(0)) + edit_map.consume(&mut tokenizer.events); } diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 53030f4..1b186e3 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -33,7 +33,7 @@ //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element use crate::token::Token; -use crate::tokenizer::{Code, ContentType, Event, EventType, State, StateFnResult, Tokenizer}; +use crate::tokenizer::{Code, ContentType, EventType, State, StateFnResult, Tokenizer}; use crate::util::{edit_map::EditMap, skip::opt as skip_opt}; /// Before a paragraph. @@ -80,7 +80,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// Merge “`Paragraph`”s, which currently span a single line, into actual /// `Paragraph`s that span multiple lines. -pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { +pub fn resolve(tokenizer: &mut Tokenizer) { let mut edit_map = EditMap::new(); let len = tokenizer.events.len(); let mut index = 0; @@ -142,5 +142,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { index += 1; } - edit_map.consume(tokenizer.events.split_off(0)) + edit_map.consume(&mut tokenizer.events); } diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index b59bb76..b56efd2 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -7,7 +7,7 @@ //! [text]: crate::content::text use crate::token::Token; -use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, Tokenizer}; +use crate::tokenizer::{Code, EventType, State, StateFnResult, Tokenizer}; use crate::util::edit_map::EditMap; /// At the beginning of data. @@ -75,7 +75,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult } /// Merge adjacent data events. -pub fn resolve_data(tokenizer: &mut Tokenizer) -> Vec<Event> { +pub fn resolve_data(tokenizer: &mut Tokenizer) { let mut edit_map = EditMap::new(); let len = tokenizer.events.len(); let mut index = 0; @@ -114,5 +114,5 @@ pub fn resolve_data(tokenizer: &mut Tokenizer) -> Vec<Event> { index += 1; } - edit_map.consume(tokenizer.events.split_off(0)) + edit_map.consume(&mut tokenizer.events); } diff --git a/src/content/document.rs b/src/content/document.rs index 53e58c4..d35060c 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -106,15 +106,16 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec index += 1; } - let mut result = (tokenizer.events, false); + let mut done = false; + let mut events = tokenizer.events; parse_state.definitions = next_definitions; - while !result.1 { - result = subtokenize(result.0, parse_state); + while !done { + done = subtokenize(&mut events, parse_state); } - result.0 + events } /// Before document. @@ -415,7 +416,7 @@ fn flow_end( info = exit_containers(tokenizer, info, &Phase::Eof); } - tokenizer.events = resolve(tokenizer, &info); + resolve(tokenizer, &info); (State::Ok, Some(vec![code])) } @@ -481,7 +482,7 @@ fn exit_containers( } // Inject the container events. -fn resolve(tokenizer: &mut Tokenizer, info: &DocumentInfo) -> Vec<Event> { +fn resolve(tokenizer: &mut Tokenizer, info: &DocumentInfo) { let mut map = EditMap::new(); let mut line_index = 0; let mut index = 0; @@ -537,5 +538,5 @@ fn resolve(tokenizer: &mut Tokenizer, info: &DocumentInfo) -> Vec<Event> { add, ); - map.consume(tokenizer.events.split_off(0)) + map.consume(&mut tokenizer.events); } diff --git a/src/subtokenize.rs b/src/subtokenize.rs index df7b015..7b7d6bd 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -61,7 +61,7 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { /// Parse linked events. /// /// Supposed to be called repeatedly, returns `1: true` when done. -pub fn subtokenize(events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>, bool) { +pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { let mut edit_map = EditMap::new(); let mut done = true; let mut index = 0; @@ -178,5 +178,7 @@ pub fn subtokenize(events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>, index += 1; } - (edit_map.consume(events), done) + edit_map.consume(events); + + done } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8813bdc..cba1752 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -91,7 +91,7 @@ pub type StateFnResult = (State, Option<Vec<Code>>); /// Resolvers are supposed to change the list of events, because parsing is /// sometimes messy, and they help expose a cleaner interface of events to /// the compiler and other users. -pub type Resolver = dyn FnOnce(&mut Tokenizer) -> Vec<Event>; +pub type Resolver = dyn FnOnce(&mut Tokenizer); /// The result of a state. pub enum State { @@ -167,8 +167,6 @@ struct InternalState { point: Point, } -// #[derive(Debug)] - /// A tokenizer itself. #[allow(clippy::struct_excessive_bools)] pub struct Tokenizer<'a> { @@ -288,12 +286,12 @@ impl<'a> Tokenizer<'a> { /// Define a jump between two places. pub fn define_skip(&mut self, point: &Point, index: usize) { - define_skip_current_impl(self, point.line, (point.column, point.offset, index)); + define_skip_impl(self, point.line, (point.column, point.offset, index)); } /// Define the current place as a jump between two places. pub fn define_skip_current(&mut self) { - define_skip_current_impl( + define_skip_impl( self, self.point.line, (self.point.column, self.point.offset, self.index), @@ -629,7 +627,7 @@ impl<'a> Tokenizer<'a> { while !self.resolvers.is_empty() { let resolver = self.resolvers.remove(0); - self.events = resolver(self); + resolver(self); } } @@ -768,7 +766,7 @@ fn flush_impl( /// /// This defines how much columns, offsets, and the `index` are increased when /// consuming a line ending. -fn define_skip_current_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize)) { +fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize)) { log::debug!("position: define skip: {:?} -> ({:?})", line, info); let at = line - tokenizer.line_start; diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index 1f43a3a..b1b5064 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -13,26 +13,23 @@ use crate::tokenizer::Event; /// Shift `previous` and `next` links according to `jumps`. /// /// This fixes links in case there are events removed or added between them. -fn shift_links(events: &mut [Event], jumps: &[(usize, isize)]) { +fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) { let map = |before| { + // To do: this theoretically gets slow, investigate how to improve it. let mut jump_index = 0; - let mut jump = 0; + let mut remove = 0; + let mut add = 0; while jump_index < jumps.len() { if jumps[jump_index].0 > before { break; } - jump = jumps[jump_index].1; + (_, remove, add) = jumps[jump_index]; jump_index += 1; } - #[allow(clippy::pedantic)] - let next_i = (before as isize) + jump; - assert!(next_i >= 0, "cannot shift before `0`"); - #[allow(clippy::pedantic)] - let next = next_i as usize; - next + before + add - remove }; let mut index = 0; @@ -72,59 +69,46 @@ impl EditMap { add_impl(self, index, remove, add, true); } /// Done, change the events. - pub fn consume(&mut self, mut events: Vec<Event>) -> Vec<Event> { + pub fn consume(&mut self, events: &mut Vec<Event>) { self.map .sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); assert!(!self.consumed, "cannot consume after consuming"); self.consumed = true; - let mut jumps: Vec<(usize, isize)> = vec![]; + // Calculate jumps: where items in the current list move to. + let mut jumps = Vec::with_capacity(self.map.len()); let mut index = 0; - let mut shift = 0; + let mut add_acc = 0; + let mut remove_acc = 0; while index < self.map.len() { let (at, remove, add) = &self.map[index]; - - #[allow(clippy::pedantic)] - let next = shift + (add.len() as isize) - (*remove as isize); - shift = next; - jumps.push((*at, shift)); + add_acc += add.len(); + remove_acc += remove; + jumps.push((*at, remove_acc, add_acc)); index += 1; } + let len_before = events.len(); let mut index = self.map.len(); - let mut vecs: Vec<Vec<Event>> = vec![]; - let mut capacity = 0; - + let mut vecs: Vec<Vec<Event>> = Vec::with_capacity(index * 2 + 1); while index > 0 { index -= 1; - let at = self.map[index].0; - - let mut keep = events.split_off(at + self.map[index].1); + let (at, remove, _) = self.map[index]; + let mut keep = events.split_off(at + remove); shift_links(&mut keep, &jumps); - capacity += keep.len(); vecs.push(keep); - - let add = self.map[index].2.split_off(0); - capacity += add.len(); - vecs.push(add); - + vecs.push(self.map[index].2.split_off(0)); events.truncate(at); } + shift_links(events, &jumps); + vecs.push(events.split_off(0)); - shift_links(&mut events, &jumps); - capacity += events.len(); - vecs.push(events); + events.reserve(len_before + add_acc - remove_acc); - let mut next_events: Vec<Event> = Vec::with_capacity(capacity); - let mut slice = vecs.pop(); - - while let Some(mut x) = slice { - next_events.append(&mut x); - slice = vecs.pop(); + while let Some(mut slice) = vecs.pop() { + events.append(&mut slice); } - - next_events } } @@ -133,12 +117,14 @@ fn add_impl(edit_map: &mut EditMap, at: usize, remove: usize, mut add: Vec<Event assert!(!edit_map.consumed, "cannot add after consuming"); let mut index = 0; + if remove == 0 && add.is_empty() { + return; + } + while index < edit_map.map.len() { if edit_map.map[index].0 == at { edit_map.map[index].1 += remove; - // To do: these might have to be split into several chunks instead - // of one, if links in `curr_add` are supported. if before { add.append(&mut edit_map.map[index].2); edit_map.map[index].2 = add; |