aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/content/document.rs105
-rw-r--r--src/subtokenize.rs142
2 files changed, 99 insertions, 148 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
index cc83415..2c340f2 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -9,7 +9,7 @@
//! * [List][crate::construct::list]
use crate::parser::ParseState;
-use crate::subtokenize::subtokenize;
+use crate::subtokenize::{divide_events, subtokenize};
use crate::token::Token;
use crate::tokenizer::{
Container, ContainerState, ContentType, Event, EventType, Link, Point, State, StateName,
@@ -550,101 +550,22 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
// Inject the container events.
fn resolve(tokenizer: &mut Tokenizer) {
let mut child = tokenizer.tokenize_state.child_tokenizer.take().unwrap();
- child.map.consume(&mut child.events);
// To do: see if we can do this less.
tokenizer.map.consume(&mut tokenizer.events);
- let mut link_index = skip::to(&tokenizer.events, 0, &[Token::Data]);
- // To do: share this code with `subtokenize`.
- // Now, loop through all subevents to figure out which parts
- // belong where and fix deep links.
- let mut subindex = 0;
- let mut slices = vec![];
- let mut slice_start = 0;
- let mut old_prev: Option<usize> = None;
-
- while subindex < child.events.len() {
- // Find the first event that starts after the end we’re looking
- // for.
- if child.events[subindex].event_type == EventType::Enter
- && child.events[subindex].point.index >= tokenizer.events[link_index + 1].point.index
- {
- slices.push((link_index, slice_start));
- slice_start = subindex;
- link_index = tokenizer.events[link_index]
- .link
- .as_ref()
- .unwrap()
- .next
- .unwrap();
- }
-
- // Fix sublinks.
- if let Some(sublink_curr) = &child.events[subindex].link {
- if sublink_curr.previous.is_some() {
- let old_prev = old_prev.unwrap();
- let prev_event = &mut child.events[old_prev];
- // The `index` in `events` where the current link is,
- // minus one to get the previous link,
- // minus 2 events (the enter and exit) for each removed
- // link.
- let new_link = if slices.is_empty() {
- old_prev + link_index + 2
- } else {
- old_prev + link_index - (slices.len() - 1) * 2
- };
- prev_event.link.as_mut().unwrap().next = Some(new_link);
- }
- }
-
- // If there is a `next` link in the subevents, we have to change
- // its `previous` index to account for the shifted events.
- // If it points to a next event, we also change the next event’s
- // reference back to *this* event.
- if let Some(sublink_curr) = &child.events[subindex].link {
- if let Some(next) = sublink_curr.next {
- let sublink_next = child.events[next].link.as_mut().unwrap();
-
- old_prev = sublink_next.previous;
-
- sublink_next.previous = sublink_next
- .previous
- // The `index` in `events` where the current link is,
- // minus 2 events (the enter and exit) for each removed
- // link.
- .map(|previous| previous + link_index - (slices.len() * 2));
- }
- }
+ divide_events(
+ &mut tokenizer.map,
+ &tokenizer.events,
+ skip::to(&tokenizer.events, 0, &[Token::Data]),
+ &mut child.events,
+ );
- subindex += 1;
- }
-
- if !child.events.is_empty() {
- slices.push((link_index, slice_start));
- }
-
- // Finally, inject the subevents.
- let mut index = slices.len();
-
- while index > 0 {
- index -= 1;
- let start = slices[index].0;
- tokenizer.map.add(
- start,
- if start == tokenizer.events.len() {
- 0
- } else {
- 2
- },
- child.events.split_off(slices[index].1),
- );
- }
- // To do: share the above code with `subtokenize`.
-
- let mut resolvers = child.resolvers.split_off(0);
- let mut resolver_ids = child.resolver_ids.split_off(0);
- tokenizer.resolvers.append(&mut resolvers);
- tokenizer.resolver_ids.append(&mut resolver_ids);
+ tokenizer
+ .resolvers
+ .append(&mut child.resolvers.split_off(0));
+ tokenizer
+ .resolver_ids
+ .append(&mut child.resolver_ids.split_off(0));
// To do: see if we can do this less.
tokenizer.map.consume(&mut tokenizer.events);
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index b080b46..10f34d0 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -108,72 +108,102 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
tokenizer.flush(state, true);
- // Now, loop through all subevents to figure out which parts
- // belong where and fix deep links.
- let mut subindex = 0;
- let mut link_index = index;
- let mut slices = vec![];
- let mut slice_start = 0;
-
- while subindex < tokenizer.events.len() {
- let subevent = &mut tokenizer.events[subindex];
-
- // Find the first event that starts after the end we’re looking
- // for.
- if subevent.event_type == EventType::Enter
- && subevent.point.index >= events[link_index + 1].point.index
- {
- slices.push((link_index, slice_start));
- slice_start = subindex;
- link_index = events[link_index].link.as_ref().unwrap().next.unwrap();
- }
+ divide_events(&mut map, events, index, &mut tokenizer.events);
- if subevent.link.is_some() {
- // Need to call `subtokenize` again.
- done = false;
- }
+ // To do: check `tokenizer.events` if there is a deep content type?
+ done = false;
+ }
+ }
- // If there is a `next` link in the subevents, we have to change
- // its index to account for the shifted events.
- // If it points to a next event, we also change the next event’s
- // reference back to *this* event.
- if let Some(sublink_curr) = &mut subevent.link {
- if let Some(next) = sublink_curr.next {
- // The `index` in `events` where the current link is,
- // minus 2 events (the enter and exit) for each removed
- // link.
- let shift = link_index - (slices.len() * 2);
- sublink_curr.next = sublink_curr.next.map(|next| next + shift);
- let next_ev = &mut tokenizer.events[next];
- let sublink_next = next_ev.link.as_mut().unwrap();
- sublink_next.previous =
- sublink_next.previous.map(|previous| previous + shift);
- }
- }
+ index += 1;
+ }
- subindex += 1;
- }
+ map.consume(events);
+
+ done
+}
- slices.push((link_index, slice_start));
+/// Parse linked events.
+///
+/// Supposed to be called repeatedly, returns `1: true` when done.
+pub fn divide_events(
+ map: &mut EditMap,
+ events: &[Event],
+ mut link_index: usize,
+ child_events: &mut Vec<Event>,
+) {
+ // Now, loop through all subevents to figure out which parts
+ // belong where and fix deep links.
+ let mut subindex = 0;
+ let mut slices = vec![];
+ let mut slice_start = 0;
+ let mut old_prev: Option<usize> = None;
+
+ while subindex < child_events.len() {
+ // Find the first event that starts after the end we’re looking
+ // for.
+ if child_events[subindex].event_type == EventType::Enter
+ && child_events[subindex].point.index >= events[link_index + 1].point.index
+ {
+ slices.push((link_index, slice_start));
+ slice_start = subindex;
+ link_index = events[link_index].link.as_ref().unwrap().next.unwrap();
+ }
- // Finally, inject the subevents.
- let mut index = slices.len();
+ // Fix sublinks.
+ if let Some(sublink_curr) = &child_events[subindex].link {
+ if sublink_curr.previous.is_some() {
+ let old_prev = old_prev.unwrap();
+ let prev_event = &mut child_events[old_prev];
+ // The `index` in `events` where the current link is,
+ // minus one to get the previous link,
+ // minus 2 events (the enter and exit) for each removed
+ // link.
+ let new_link = if slices.is_empty() {
+ old_prev + link_index + 2
+ } else {
+ old_prev + link_index - (slices.len() - 1) * 2
+ };
+ prev_event.link.as_mut().unwrap().next = Some(new_link);
+ }
+ }
- while index > 0 {
- index -= 1;
- map.add(
- slices[index].0,
- 2,
- tokenizer.events.split_off(slices[index].1),
- );
- }
+ // If there is a `next` link in the subevents, we have to change
+ // its `previous` index to account for the shifted events.
+ // If it points to a next event, we also change the next event’s
+ // reference back to *this* event.
+ if let Some(sublink_curr) = &child_events[subindex].link {
+ if let Some(next) = sublink_curr.next {
+ let sublink_next = child_events[next].link.as_mut().unwrap();
+
+ old_prev = sublink_next.previous;
+
+ sublink_next.previous = sublink_next
+ .previous
+ // The `index` in `events` where the current link is,
+ // minus 2 events (the enter and exit) for each removed
+ // link.
+ .map(|previous| previous + link_index - (slices.len() * 2));
}
}
- index += 1;
+ subindex += 1;
}
- map.consume(events);
+ if !child_events.is_empty() {
+ slices.push((link_index, slice_start));
+ }
- done
+ // Finally, inject the subevents.
+ let mut index = slices.len();
+
+ while index > 0 {
+ index -= 1;
+ let start = slices[index].0;
+ map.add(
+ start,
+ if start == events.len() { 0 } else { 2 },
+ child_events.split_off(slices[index].1),
+ );
+ }
}