//! Deal with content in other content. //! //! To deal with content in content, *you* (a `micromark-rs` contributor) add //! information on events. //! Events are a flat list, but they can be connected to each other by setting //! `previous` and `next` links. //! These links: //! //! * …must occur on [`Enter`][EventType::Enter] events only //! * …must occur on void events (they are followed by their corresponding //! [`Exit`][EventType::Exit] event) //! * …must have `content_type` field to define the kind of subcontent //! //! Links will then be passed through a tokenizer for the corresponding content //! type by `subtokenize`. //! The subevents they result in are split up into slots for each linked token //! and replace those links. //! //! Subevents are not immediately subtokenized again because markdown prevents //! us from doing so due to definitions, which can occur after references, and //! thus the whole document needs to be parsed up to the level of definitions, //! before any level that can include references can be parsed. use crate::content::{string::start as string, text::start as text}; use crate::parser::ParseState; use crate::tokenizer::{ContentType, Event, EventType, State, StateFn, StateFnResult, Tokenizer}; use crate::util::{edit_map::EditMap, span}; /// Create a link between two [`Event`][]s. /// /// Arbitrary (void) events can be linked together. /// This optimizes for the common case where the token at `index` is connected /// to the previous void token. pub fn link(events: &mut [Event], index: usize) { link_to(events, index - 2, index); } /// Link two arbitrary [`Event`][]s together. pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { let prev = &mut events[pevious]; assert!( prev.content_type.is_some(), "expected `content_type` on previous" ); assert_eq!(prev.event_type, EventType::Enter); prev.next = Some(next); let prev_ref = &events[pevious]; let prev_exit_ref = &events[pevious + 1]; let curr_ref = &events[next]; assert_eq!(prev_exit_ref.event_type, EventType::Exit); assert_eq!(prev_exit_ref.token_type, prev_ref.token_type); assert_eq!(curr_ref.content_type, prev_ref.content_type); let curr = &mut events[next]; assert_eq!(curr.event_type, EventType::Enter); curr.previous = Some(pevious); // Note: the exit of this event may not exist, so don’t check for that. } /// Parse linked events. /// /// Supposed to be called repeatedly, returns `1: true` when done. pub fn subtokenize(mut events: Vec, parse_state: &ParseState) -> (Vec, bool) { let mut edit_map = EditMap::new(); let mut done = true; let mut index = 0; while index < events.len() { let event = &events[index]; // Find each first opening chunk. if let Some(ref content_type) = event.content_type { assert_eq!(event.event_type, EventType::Enter); // No need to enter linked events again. if event.previous == None { // Index into `events` pointing to a chunk. let mut link_index: Option = Some(index); // Subtokenizer. let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state); // Substate. let mut result: StateFnResult = ( State::Fn(Box::new(if *content_type == ContentType::String { string } else { text })), None, ); // Loop through links to pass them in order to the subtokenizer. while let Some(index) = link_index { let enter = &events[index]; assert_eq!(enter.event_type, EventType::Enter); let span = span::Span { start_index: enter.index, end_index: events[index + 1].index, }; if enter.previous != None { tokenizer.define_skip(&enter.point, enter.index); } let func: Box = match result.0 { State::Fn(func) => func, _ => unreachable!("cannot be ok/nok"), }; result = tokenizer.push( span::codes(&parse_state.codes, &span), func, enter.next == None, ); assert!(result.1.is_none(), "expected no remainder"); link_index = enter.next; } // Now, loop through all subevents to figure out which parts // belong where and fix deep links. let mut subindex = 0; let mut link_index = index; let mut slices = vec![]; let mut slice_start = 0; while subindex < tokenizer.events.len() { let subevent = &mut tokenizer.events[subindex]; // Find the first event that starts after the end we’re looking // for. if subevent.event_type == EventType::Enter && subevent.index >= events[link_index + 1].index { slices.push((link_index, slice_start)); slice_start = subindex; link_index = events[link_index].next.unwrap(); } if subevent.content_type.is_some() { // Need to call `subtokenize` again. done = false; } // If there is a `next` link in the subevents, we have to change // its index to account for the shifted events. // If it points to a next event, we also change the next event’s // reference back to *this* event. if let Some(next) = subevent.next { // The `index` in `events` where the current link is, // minus 2 events (the enter and exit) for each removed // link. let shift = link_index - (slices.len() * 2); subevent.next = Some(next + shift); let next_ev = &mut tokenizer.events[next]; let previous = next_ev.previous.unwrap(); next_ev.previous = Some(previous + shift); } subindex += 1; } slices.push((link_index, slice_start)); // Finally, inject the subevents. let mut index = slices.len(); while index > 0 { index -= 1; edit_map.add( slices[index].0, 2, tokenizer.events.split_off(slices[index].1), ); } } } index += 1; } (edit_map.consume(&mut events), done) }