aboutsummaryrefslogtreecommitdiffstats
path: root/src/subtokenize.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/subtokenize.rs')
-rw-r--r--src/subtokenize.rs142
1 files changed, 86 insertions, 56 deletions
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index b080b46..10f34d0 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -108,72 +108,102 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
tokenizer.flush(state, true);
- // Now, loop through all subevents to figure out which parts
- // belong where and fix deep links.
- let mut subindex = 0;
- let mut link_index = index;
- let mut slices = vec![];
- let mut slice_start = 0;
-
- while subindex < tokenizer.events.len() {
- let subevent = &mut tokenizer.events[subindex];
-
- // Find the first event that starts after the end we’re looking
- // for.
- if subevent.event_type == EventType::Enter
- && subevent.point.index >= events[link_index + 1].point.index
- {
- slices.push((link_index, slice_start));
- slice_start = subindex;
- link_index = events[link_index].link.as_ref().unwrap().next.unwrap();
- }
+ divide_events(&mut map, events, index, &mut tokenizer.events);
- if subevent.link.is_some() {
- // Need to call `subtokenize` again.
- done = false;
- }
+ // To do: check `tokenizer.events` if there is a deep content type?
+ done = false;
+ }
+ }
- // If there is a `next` link in the subevents, we have to change
- // its index to account for the shifted events.
- // If it points to a next event, we also change the next event’s
- // reference back to *this* event.
- if let Some(sublink_curr) = &mut subevent.link {
- if let Some(next) = sublink_curr.next {
- // The `index` in `events` where the current link is,
- // minus 2 events (the enter and exit) for each removed
- // link.
- let shift = link_index - (slices.len() * 2);
- sublink_curr.next = sublink_curr.next.map(|next| next + shift);
- let next_ev = &mut tokenizer.events[next];
- let sublink_next = next_ev.link.as_mut().unwrap();
- sublink_next.previous =
- sublink_next.previous.map(|previous| previous + shift);
- }
- }
+ index += 1;
+ }
- subindex += 1;
- }
+ map.consume(events);
+
+ done
+}
- slices.push((link_index, slice_start));
+/// Parse linked events.
+///
+/// Supposed to be called repeatedly, returns `1: true` when done.
+pub fn divide_events(
+ map: &mut EditMap,
+ events: &[Event],
+ mut link_index: usize,
+ child_events: &mut Vec<Event>,
+) {
+ // Now, loop through all subevents to figure out which parts
+ // belong where and fix deep links.
+ let mut subindex = 0;
+ let mut slices = vec![];
+ let mut slice_start = 0;
+ let mut old_prev: Option<usize> = None;
+
+ while subindex < child_events.len() {
+ // Find the first event that starts after the end we’re looking
+ // for.
+ if child_events[subindex].event_type == EventType::Enter
+ && child_events[subindex].point.index >= events[link_index + 1].point.index
+ {
+ slices.push((link_index, slice_start));
+ slice_start = subindex;
+ link_index = events[link_index].link.as_ref().unwrap().next.unwrap();
+ }
- // Finally, inject the subevents.
- let mut index = slices.len();
+ // Fix sublinks.
+ if let Some(sublink_curr) = &child_events[subindex].link {
+ if sublink_curr.previous.is_some() {
+ let old_prev = old_prev.unwrap();
+ let prev_event = &mut child_events[old_prev];
+ // The `index` in `events` where the current link is,
+ // minus one to get the previous link,
+ // minus 2 events (the enter and exit) for each removed
+ // link.
+ let new_link = if slices.is_empty() {
+ old_prev + link_index + 2
+ } else {
+ old_prev + link_index - (slices.len() - 1) * 2
+ };
+ prev_event.link.as_mut().unwrap().next = Some(new_link);
+ }
+ }
- while index > 0 {
- index -= 1;
- map.add(
- slices[index].0,
- 2,
- tokenizer.events.split_off(slices[index].1),
- );
- }
+ // If there is a `next` link in the subevents, we have to change
+ // its `previous` index to account for the shifted events.
+ // If it points to a next event, we also change the next event’s
+ // reference back to *this* event.
+ if let Some(sublink_curr) = &child_events[subindex].link {
+ if let Some(next) = sublink_curr.next {
+ let sublink_next = child_events[next].link.as_mut().unwrap();
+
+ old_prev = sublink_next.previous;
+
+ sublink_next.previous = sublink_next
+ .previous
+ // The `index` in `events` where the current link is,
+ // minus 2 events (the enter and exit) for each removed
+ // link.
+ .map(|previous| previous + link_index - (slices.len() * 2));
}
}
- index += 1;
+ subindex += 1;
}
- map.consume(events);
+ if !child_events.is_empty() {
+ slices.push((link_index, slice_start));
+ }
- done
+ // Finally, inject the subevents.
+ let mut index = slices.len();
+
+ while index > 0 {
+ index -= 1;
+ let start = slices[index].0;
+ map.add(
+ start,
+ if start == events.len() { 0 } else { 2 },
+ child_events.split_off(slices[index].1),
+ );
+ }
}