From ae0f12e668cfd37728aad907c813431595e6cc1b Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 19 Jul 2022 15:36:21 +0200 Subject: Use `edit_map` in `subtokenize` --- readme.md | 7 ++-- src/subtokenize.rs | 107 +++++++++++++++++++-------------------------------- src/tokenizer.rs | 2 - src/util/edit_map.rs | 15 +------- 4 files changed, 45 insertions(+), 86 deletions(-) diff --git a/readme.md b/readme.md index f446535..4ab2730 100644 --- a/readme.md +++ b/readme.md @@ -57,7 +57,6 @@ cargo doc --document-private-items #### Refactor -- [ ] (1) Use `edit_map` in `subtokenize` (needs to support links in edits) - [ ] (1) Improve `interrupt`, `concrete`, `lazy` fields somehow? #### Parse @@ -71,8 +70,7 @@ cargo doc --document-private-items #### Misc -- [ ] (3) `no_std`: remove all `HashMap` to use vecs, vecs w/ tuples? -- [ ] (3) Remove splicing and cloning in subtokenizer +- [ ] (3) `no_std`? - [ ] (3) Pass more references around - [ ] (1) Get markers from constructs (`string`, `text`) - [ ] (3) Read through rust docs to figure out what useful functions there are, @@ -208,3 +206,6 @@ important. - [x] (1) Add list of void tokens, check that they’re void - [x] (3) Use `commonmark` tests - [x] (3) Add support for turning off constructs +- [x] (1) Use `edit_map` in `subtokenize` +- [x] (3) Remove all `HashMap`s +- [x] (3) Remove splicing and cloning in subtokenizer diff --git a/src/subtokenize.rs b/src/subtokenize.rs index ce4f788..174ddfe 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -21,11 +21,10 @@ //! thus the whole document needs to be parsed up to the level of definitions, //! before any level that can include references can be parsed. -use crate::content::{flow::start as flow, string::start as string, text::start as text}; +use crate::content::{string::start as string, text::start as text}; use crate::parser::ParseState; use crate::tokenizer::{ContentType, Event, EventType, State, StateFn, StateFnResult, Tokenizer}; -use crate::util::span; -use std::collections::HashMap; +use crate::util::{edit_map::EditMap, span}; /// Create a link between two [`Event`][]s. /// @@ -63,16 +62,9 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { /// /// Supposed to be called repeatedly, returns `1: true` when done. pub fn subtokenize(mut events: Vec, parse_state: &ParseState) -> (Vec, bool) { - let mut index = 0; - // Map of first chunks to their tokenizer. - let mut head_to_tokenizer: HashMap = HashMap::new(); - // Map of chunks to their head and corresponding range of events. - let mut link_to_info: HashMap = HashMap::new(); + let mut edit_map = EditMap::new(); let mut done = true; - - if events.is_empty() { - return (events, true); - } + let mut index = 0; while index < events.len() { let event = &events[index]; @@ -83,34 +75,28 @@ pub fn subtokenize(mut events: Vec, parse_state: &ParseState) -> (Vec = Some(index); + let mut link_index: Option = Some(index); // Subtokenizer. let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state); // Substate. let mut result: StateFnResult = ( - State::Fn(Box::new(if *content_type == ContentType::Flow { - flow - } else if *content_type == ContentType::String { + State::Fn(Box::new(if *content_type == ContentType::String { string } else { text })), None, ); - // Indices into `codes` of each end of chunk. - let mut ends: Vec = vec![]; - // Loop through chunks to pass them in order to the subtokenizer. - while let Some(index_ptr) = index_opt { - let enter = &events[index_ptr]; + // Loop through links to pass them in order to the subtokenizer. + while let Some(index) = link_index { + let enter = &events[index]; assert_eq!(enter.event_type, EventType::Enter); let span = span::Span { start_index: enter.index, - end_index: events[index_ptr + 1].index, + end_index: events[index + 1].index, }; - ends.push(span.end_index); if enter.previous != None { tokenizer.define_skip(&enter.point, enter.index); @@ -127,32 +113,32 @@ pub fn subtokenize(mut events: Vec, parse_state: &ParseState) -> (Vec = Some(index); + let mut link_index = index; + let mut slices = vec![]; + let mut slice_start = 0; while subindex < tokenizer.events.len() { let subevent = &mut tokenizer.events[subindex]; // Find the first event that starts after the end we’re looking // for. - if subevent.event_type == EventType::Enter && subevent.index >= ends[end_index] + if subevent.event_type == EventType::Enter + && subevent.index >= events[link_index + 1].index { - let link = index_opt.unwrap(); - link_to_info.insert(link, (index, last_start, subindex)); + slices.push((link_index, slice_start)); + slice_start = subindex; + link_index = events[link_index].next.unwrap(); + } - last_start = subindex; - end_index += 1; - index_opt = events[link].next; + if subevent.content_type.is_some() { + // Need to call `subtokenize` again. + done = false; } // If there is a `next` link in the subevents, we have to change @@ -163,8 +149,7 @@ pub fn subtokenize(mut events: Vec, parse_state: &ParseState) -> (Vec, parse_state: &ParseState) -> (Vec 0 { - let slice_opt = link_to_info.get(&index); - - if let Some(slice) = slice_opt { - let (head, start, end) = *slice; - // If there’s a slice at this index, it must also point to a head, - // and that head must have a tokenizer. - let tokenizer = head_to_tokenizer.get(&head).unwrap(); + // Finally, inject the subevents. + let mut index = slices.len(); - // To do: figure out a way that moves instead of clones? - events.splice(index..(index + 2), tokenizer.events[start..end].to_vec()); + while index > 0 { + index -= 1; + edit_map.add( + slices[index].0, + 2, + tokenizer.events.split_off(slices[index].1), + ); + } + } } - index -= 1; + index += 1; } - (events, done) + (edit_map.consume(&mut events), done) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 92a9e1a..8f85af0 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -17,8 +17,6 @@ use crate::token::{Token, VOID_TOKENS}; /// Embedded content type. #[derive(Debug, Clone, Copy, PartialEq)] pub enum ContentType { - /// Represents [flow content][crate::content::flow]. - Flow, /// Represents [text content][crate::content::text]. Text, /// Represents [string content][crate::content::string]. diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index eda767a..90ff483 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -107,20 +107,7 @@ impl EditMap { next_events.append(append); } - if !add.is_empty() { - let append = &mut add; - let mut index = 0; - - while index < append.len() { - let event = &mut append[index]; - assert!(event.previous.is_none(), "to do?"); - assert!(event.next.is_none(), "to do?"); - index += 1; - } - - next_events.append(append); - } - + next_events.append(&mut add); start = at + remove; index += 1; } -- cgit