diff options
Diffstat (limited to '')
| -rw-r--r-- | src/content/document.rs | 105 | ||||
| -rw-r--r-- | src/subtokenize.rs | 142 | 
2 files changed, 99 insertions, 148 deletions
| diff --git a/src/content/document.rs b/src/content/document.rs index cc83415..2c340f2 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -9,7 +9,7 @@  //! *   [List][crate::construct::list]  use crate::parser::ParseState; -use crate::subtokenize::subtokenize; +use crate::subtokenize::{divide_events, subtokenize};  use crate::token::Token;  use crate::tokenizer::{      Container, ContainerState, ContentType, Event, EventType, Link, Point, State, StateName, @@ -550,101 +550,22 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {  // Inject the container events.  fn resolve(tokenizer: &mut Tokenizer) {      let mut child = tokenizer.tokenize_state.child_tokenizer.take().unwrap(); -    child.map.consume(&mut child.events);      // To do: see if we can do this less.      tokenizer.map.consume(&mut tokenizer.events); -    let mut link_index = skip::to(&tokenizer.events, 0, &[Token::Data]); -    // To do: share this code with `subtokenize`. -    // Now, loop through all subevents to figure out which parts -    // belong where and fix deep links. -    let mut subindex = 0; -    let mut slices = vec![]; -    let mut slice_start = 0; -    let mut old_prev: Option<usize> = None; - -    while subindex < child.events.len() { -        // Find the first event that starts after the end we’re looking -        // for. -        if child.events[subindex].event_type == EventType::Enter -            && child.events[subindex].point.index >= tokenizer.events[link_index + 1].point.index -        { -            slices.push((link_index, slice_start)); -            slice_start = subindex; -            link_index = tokenizer.events[link_index] -                .link -                .as_ref() -                .unwrap() -                .next -                .unwrap(); -        } - -        // Fix sublinks. -        if let Some(sublink_curr) = &child.events[subindex].link { -            if sublink_curr.previous.is_some() { -                let old_prev = old_prev.unwrap(); -                let prev_event = &mut child.events[old_prev]; -                // The `index` in `events` where the current link is, -                // minus one to get the previous link, -                // minus 2 events (the enter and exit) for each removed -                // link. -                let new_link = if slices.is_empty() { -                    old_prev + link_index + 2 -                } else { -                    old_prev + link_index - (slices.len() - 1) * 2 -                }; -                prev_event.link.as_mut().unwrap().next = Some(new_link); -            } -        } - -        // If there is a `next` link in the subevents, we have to change -        // its `previous` index to account for the shifted events. -        // If it points to a next event, we also change the next event’s -        // reference back to *this* event. -        if let Some(sublink_curr) = &child.events[subindex].link { -            if let Some(next) = sublink_curr.next { -                let sublink_next = child.events[next].link.as_mut().unwrap(); - -                old_prev = sublink_next.previous; - -                sublink_next.previous = sublink_next -                    .previous -                    // The `index` in `events` where the current link is, -                    // minus 2 events (the enter and exit) for each removed -                    // link. -                    .map(|previous| previous + link_index - (slices.len() * 2)); -            } -        } +    divide_events( +        &mut tokenizer.map, +        &tokenizer.events, +        skip::to(&tokenizer.events, 0, &[Token::Data]), +        &mut child.events, +    ); -        subindex += 1; -    } - -    if !child.events.is_empty() { -        slices.push((link_index, slice_start)); -    } - -    // Finally, inject the subevents. -    let mut index = slices.len(); - -    while index > 0 { -        index -= 1; -        let start = slices[index].0; -        tokenizer.map.add( -            start, -            if start == tokenizer.events.len() { -                0 -            } else { -                2 -            }, -            child.events.split_off(slices[index].1), -        ); -    } -    // To do: share the above code with `subtokenize`. - -    let mut resolvers = child.resolvers.split_off(0); -    let mut resolver_ids = child.resolver_ids.split_off(0); -    tokenizer.resolvers.append(&mut resolvers); -    tokenizer.resolver_ids.append(&mut resolver_ids); +    tokenizer +        .resolvers +        .append(&mut child.resolvers.split_off(0)); +    tokenizer +        .resolver_ids +        .append(&mut child.resolver_ids.split_off(0));      // To do: see if we can do this less.      tokenizer.map.consume(&mut tokenizer.events); diff --git a/src/subtokenize.rs b/src/subtokenize.rs index b080b46..10f34d0 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -108,72 +108,102 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {                  tokenizer.flush(state, true); -                // Now, loop through all subevents to figure out which parts -                // belong where and fix deep links. -                let mut subindex = 0; -                let mut link_index = index; -                let mut slices = vec![]; -                let mut slice_start = 0; - -                while subindex < tokenizer.events.len() { -                    let subevent = &mut tokenizer.events[subindex]; - -                    // Find the first event that starts after the end we’re looking -                    // for. -                    if subevent.event_type == EventType::Enter -                        && subevent.point.index >= events[link_index + 1].point.index -                    { -                        slices.push((link_index, slice_start)); -                        slice_start = subindex; -                        link_index = events[link_index].link.as_ref().unwrap().next.unwrap(); -                    } +                divide_events(&mut map, events, index, &mut tokenizer.events); -                    if subevent.link.is_some() { -                        // Need to call `subtokenize` again. -                        done = false; -                    } +                // To do: check `tokenizer.events` if there is a deep content type? +                done = false; +            } +        } -                    // If there is a `next` link in the subevents, we have to change -                    // its index to account for the shifted events. -                    // If it points to a next event, we also change the next event’s -                    // reference back to *this* event. -                    if let Some(sublink_curr) = &mut subevent.link { -                        if let Some(next) = sublink_curr.next { -                            // The `index` in `events` where the current link is, -                            // minus 2 events (the enter and exit) for each removed -                            // link. -                            let shift = link_index - (slices.len() * 2); -                            sublink_curr.next = sublink_curr.next.map(|next| next + shift); -                            let next_ev = &mut tokenizer.events[next]; -                            let sublink_next = next_ev.link.as_mut().unwrap(); -                            sublink_next.previous = -                                sublink_next.previous.map(|previous| previous + shift); -                        } -                    } +        index += 1; +    } -                    subindex += 1; -                } +    map.consume(events); + +    done +} -                slices.push((link_index, slice_start)); +/// Parse linked events. +/// +/// Supposed to be called repeatedly, returns `1: true` when done. +pub fn divide_events( +    map: &mut EditMap, +    events: &[Event], +    mut link_index: usize, +    child_events: &mut Vec<Event>, +) { +    // Now, loop through all subevents to figure out which parts +    // belong where and fix deep links. +    let mut subindex = 0; +    let mut slices = vec![]; +    let mut slice_start = 0; +    let mut old_prev: Option<usize> = None; + +    while subindex < child_events.len() { +        // Find the first event that starts after the end we’re looking +        // for. +        if child_events[subindex].event_type == EventType::Enter +            && child_events[subindex].point.index >= events[link_index + 1].point.index +        { +            slices.push((link_index, slice_start)); +            slice_start = subindex; +            link_index = events[link_index].link.as_ref().unwrap().next.unwrap(); +        } -                // Finally, inject the subevents. -                let mut index = slices.len(); +        // Fix sublinks. +        if let Some(sublink_curr) = &child_events[subindex].link { +            if sublink_curr.previous.is_some() { +                let old_prev = old_prev.unwrap(); +                let prev_event = &mut child_events[old_prev]; +                // The `index` in `events` where the current link is, +                // minus one to get the previous link, +                // minus 2 events (the enter and exit) for each removed +                // link. +                let new_link = if slices.is_empty() { +                    old_prev + link_index + 2 +                } else { +                    old_prev + link_index - (slices.len() - 1) * 2 +                }; +                prev_event.link.as_mut().unwrap().next = Some(new_link); +            } +        } -                while index > 0 { -                    index -= 1; -                    map.add( -                        slices[index].0, -                        2, -                        tokenizer.events.split_off(slices[index].1), -                    ); -                } +        // If there is a `next` link in the subevents, we have to change +        // its `previous` index to account for the shifted events. +        // If it points to a next event, we also change the next event’s +        // reference back to *this* event. +        if let Some(sublink_curr) = &child_events[subindex].link { +            if let Some(next) = sublink_curr.next { +                let sublink_next = child_events[next].link.as_mut().unwrap(); + +                old_prev = sublink_next.previous; + +                sublink_next.previous = sublink_next +                    .previous +                    // The `index` in `events` where the current link is, +                    // minus 2 events (the enter and exit) for each removed +                    // link. +                    .map(|previous| previous + link_index - (slices.len() * 2));              }          } -        index += 1; +        subindex += 1;      } -    map.consume(events); +    if !child_events.is_empty() { +        slices.push((link_index, slice_start)); +    } -    done +    // Finally, inject the subevents. +    let mut index = slices.len(); + +    while index > 0 { +        index -= 1; +        let start = slices[index].0; +        map.add( +            start, +            if start == events.len() { 0 } else { 2 }, +            child_events.split_off(slices[index].1), +        ); +    }  } | 
