diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-19 15:36:21 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-19 15:36:21 +0200 |
commit | ae0f12e668cfd37728aad907c813431595e6cc1b (patch) | |
tree | 3cdc7282643656633a11c992cd7d1d050924dadc | |
parent | c4cd482fd5006cde338e49104f2abdbd20fd644d (diff) | |
download | markdown-rs-ae0f12e668cfd37728aad907c813431595e6cc1b.tar.gz markdown-rs-ae0f12e668cfd37728aad907c813431595e6cc1b.tar.bz2 markdown-rs-ae0f12e668cfd37728aad907c813431595e6cc1b.zip |
Use `edit_map` in `subtokenize`
-rw-r--r-- | readme.md | 7 | ||||
-rw-r--r-- | src/subtokenize.rs | 107 | ||||
-rw-r--r-- | src/tokenizer.rs | 2 | ||||
-rw-r--r-- | src/util/edit_map.rs | 15 |
4 files changed, 45 insertions, 86 deletions
@@ -57,7 +57,6 @@ cargo doc --document-private-items #### Refactor -- [ ] (1) Use `edit_map` in `subtokenize` (needs to support links in edits) - [ ] (1) Improve `interrupt`, `concrete`, `lazy` fields somehow? #### Parse @@ -71,8 +70,7 @@ cargo doc --document-private-items #### Misc -- [ ] (3) `no_std`: remove all `HashMap` to use vecs, vecs w/ tuples? -- [ ] (3) Remove splicing and cloning in subtokenizer +- [ ] (3) `no_std`? - [ ] (3) Pass more references around - [ ] (1) Get markers from constructs (`string`, `text`) - [ ] (3) Read through rust docs to figure out what useful functions there are, @@ -208,3 +206,6 @@ important. - [x] (1) Add list of void tokens, check that they’re void - [x] (3) Use `commonmark` tests - [x] (3) Add support for turning off constructs +- [x] (1) Use `edit_map` in `subtokenize` +- [x] (3) Remove all `HashMap`s +- [x] (3) Remove splicing and cloning in subtokenizer diff --git a/src/subtokenize.rs b/src/subtokenize.rs index ce4f788..174ddfe 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -21,11 +21,10 @@ //! thus the whole document needs to be parsed up to the level of definitions, //! before any level that can include references can be parsed. -use crate::content::{flow::start as flow, string::start as string, text::start as text}; +use crate::content::{string::start as string, text::start as text}; use crate::parser::ParseState; use crate::tokenizer::{ContentType, Event, EventType, State, StateFn, StateFnResult, Tokenizer}; -use crate::util::span; -use std::collections::HashMap; +use crate::util::{edit_map::EditMap, span}; /// Create a link between two [`Event`][]s. /// @@ -63,16 +62,9 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { /// /// Supposed to be called repeatedly, returns `1: true` when done. pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>, bool) { - let mut index = 0; - // Map of first chunks to their tokenizer. - let mut head_to_tokenizer: HashMap<usize, Tokenizer> = HashMap::new(); - // Map of chunks to their head and corresponding range of events. - let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new(); + let mut edit_map = EditMap::new(); let mut done = true; - - if events.is_empty() { - return (events, true); - } + let mut index = 0; while index < events.len() { let event = &events[index]; @@ -83,34 +75,28 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve // No need to enter linked events again. if event.previous == None { - done = false; // Index into `events` pointing to a chunk. - let mut index_opt: Option<usize> = Some(index); + let mut link_index: Option<usize> = Some(index); // Subtokenizer. let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state); // Substate. let mut result: StateFnResult = ( - State::Fn(Box::new(if *content_type == ContentType::Flow { - flow - } else if *content_type == ContentType::String { + State::Fn(Box::new(if *content_type == ContentType::String { string } else { text })), None, ); - // Indices into `codes` of each end of chunk. - let mut ends: Vec<usize> = vec![]; - // Loop through chunks to pass them in order to the subtokenizer. - while let Some(index_ptr) = index_opt { - let enter = &events[index_ptr]; + // Loop through links to pass them in order to the subtokenizer. + while let Some(index) = link_index { + let enter = &events[index]; assert_eq!(enter.event_type, EventType::Enter); let span = span::Span { start_index: enter.index, - end_index: events[index_ptr + 1].index, + end_index: events[index + 1].index, }; - ends.push(span.end_index); if enter.previous != None { tokenizer.define_skip(&enter.point, enter.index); @@ -127,32 +113,32 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve enter.next == None, ); assert!(result.1.is_none(), "expected no remainder"); - index_opt = enter.next; + link_index = enter.next; } - // Now, loop through all subevents (and `ends`), to figure out - // which parts belong where. - // Current index. + // Now, loop through all subevents to figure out which parts + // belong where and fix deep links. let mut subindex = 0; - // Index into subevents that starts the current slice. - let mut last_start = 0; - // Counter into `ends`: the linked token we are at. - let mut end_index = 0; - let mut index_opt: Option<usize> = Some(index); + let mut link_index = index; + let mut slices = vec![]; + let mut slice_start = 0; while subindex < tokenizer.events.len() { let subevent = &mut tokenizer.events[subindex]; // Find the first event that starts after the end we’re looking // for. - if subevent.event_type == EventType::Enter && subevent.index >= ends[end_index] + if subevent.event_type == EventType::Enter + && subevent.index >= events[link_index + 1].index { - let link = index_opt.unwrap(); - link_to_info.insert(link, (index, last_start, subindex)); + slices.push((link_index, slice_start)); + slice_start = subindex; + link_index = events[link_index].next.unwrap(); + } - last_start = subindex; - end_index += 1; - index_opt = events[link].next; + if subevent.content_type.is_some() { + // Need to call `subtokenize` again. + done = false; } // If there is a `next` link in the subevents, we have to change @@ -163,8 +149,7 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve // The `index` in `events` where the current link is, // minus 2 events (the enter and exit) for each removed // link. - let shift = index_opt.unwrap() - (end_index * 2); - + let shift = link_index - (slices.len() * 2); subevent.next = Some(next + shift); let next_ev = &mut tokenizer.events[next]; let previous = next_ev.previous.unwrap(); @@ -174,36 +159,24 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve subindex += 1; } - link_to_info.insert(index_opt.unwrap(), (index, last_start, subindex)); - head_to_tokenizer.insert(index, tokenizer); - } - } + slices.push((link_index, slice_start)); - index += 1; - } - - // Now that we fed everything into a tokenizer, and we know which parts - // belong where, the final task is to splice the events from each - // tokenizer into the current events. - // To do: instead of splicing, it might be possible to create a new `events` - // from each slice and slices from events? - let mut index = events.len() - 1; - - while index > 0 { - let slice_opt = link_to_info.get(&index); - - if let Some(slice) = slice_opt { - let (head, start, end) = *slice; - // If there’s a slice at this index, it must also point to a head, - // and that head must have a tokenizer. - let tokenizer = head_to_tokenizer.get(&head).unwrap(); + // Finally, inject the subevents. + let mut index = slices.len(); - // To do: figure out a way that moves instead of clones? - events.splice(index..(index + 2), tokenizer.events[start..end].to_vec()); + while index > 0 { + index -= 1; + edit_map.add( + slices[index].0, + 2, + tokenizer.events.split_off(slices[index].1), + ); + } + } } - index -= 1; + index += 1; } - (events, done) + (edit_map.consume(&mut events), done) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 92a9e1a..8f85af0 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -17,8 +17,6 @@ use crate::token::{Token, VOID_TOKENS}; /// Embedded content type. #[derive(Debug, Clone, Copy, PartialEq)] pub enum ContentType { - /// Represents [flow content][crate::content::flow]. - Flow, /// Represents [text content][crate::content::text]. Text, /// Represents [string content][crate::content::string]. diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index eda767a..90ff483 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -107,20 +107,7 @@ impl EditMap { next_events.append(append); } - if !add.is_empty() { - let append = &mut add; - let mut index = 0; - - while index < append.len() { - let event = &mut append[index]; - assert!(event.previous.is_none(), "to do?"); - assert!(event.next.is_none(), "to do?"); - index += 1; - } - - next_events.append(append); - } - + next_events.append(&mut add); start = at + remove; index += 1; } |