//! The document content type. //! //! **Document** represents the containers, such as block quotes, list items, //! or GFM footnotes, which structure the document and contain other sections. //! //! The constructs found in flow are: //! //! * [Block quote][crate::construct::block_quote] //! * [List item][crate::construct::list_item] //! * [GFM: Footnote definition][crate::construct::gfm_footnote_definition] use crate::event::{Content, Event, Kind, Link, Name}; use crate::state::{Name as StateName, State}; use crate::subtokenize::divide_events; use crate::tokenizer::{Container, ContainerState, Tokenizer}; use crate::util::skip; use alloc::{boxed::Box, string::String, vec::Vec}; /// Phases where we can exit containers. #[derive(Debug, PartialEq)] enum Phase { /// After parsing a line of lazy flow which resulted in something that /// exits containers before the line. /// /// ```markdown /// | * a /// > | ```js /// ^ /// | b /// | ``` /// ``` After, /// When a new container replaces an existing container. /// /// ```markdown /// | * a /// > | > b /// ^ /// ``` Prefix, /// After everything. /// /// ```markdown /// > | * a /// ^ /// ``` Eof, } /// Start of document, at an optional BOM. /// /// ```markdown /// > | a /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_child = Some(Box::new(Tokenizer::new( tokenizer.point.clone(), tokenizer.parse_state, ))); tokenizer.attempt( State::Next(StateName::DocumentBeforeFrontmatter), State::Next(StateName::DocumentBeforeFrontmatter), ); State::Retry(StateName::BomStart) } /// At optional frontmatter. /// /// ```markdown /// > | --- /// ^ /// | title: Venus /// | --- /// ``` pub fn before_frontmatter(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( State::Next(StateName::DocumentContainerNewBefore), State::Next(StateName::DocumentContainerNewBefore), ); State::Retry(StateName::FrontmatterStart) } /// At optional existing containers. // /// ```markdown /// | * a /// > | > b /// ^ /// ``` pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { // If there are more existing containers, check whether the next one continues. if tokenizer.tokenize_state.document_continued < tokenizer.tokenize_state.document_container_stack.len() { let container = &tokenizer.tokenize_state.document_container_stack [tokenizer.tokenize_state.document_continued]; let name = match container.kind { Container::BlockQuote => StateName::BlockQuoteContStart, Container::GfmFootnoteDefinition => StateName::GfmFootnoteDefinitionContStart, Container::ListItem => StateName::ListItemContStart, }; tokenizer.attempt( State::Next(StateName::DocumentContainerExistingAfter), State::Next(StateName::DocumentContainerNewBefore), ); State::Retry(name) } // Otherwise, check new containers. else { State::Retry(StateName::DocumentContainerNewBefore) } } /// After continued existing container. // /// ```markdown /// | * a /// > | b /// ^ /// ``` pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.document_continued += 1; State::Retry(StateName::DocumentContainerExistingBefore) } /// At new containers. // /// ```markdown /// > | * a /// ^ /// > | > b /// ^ /// ``` pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { // If we have completely continued, restore the flow’s past `interrupt` // status. if tokenizer.tokenize_state.document_continued == tokenizer.tokenize_state.document_container_stack.len() { let child = tokenizer.tokenize_state.document_child.as_ref().unwrap(); tokenizer.interrupt = child.interrupt; // …and if we’re in a concrete construct, new containers can’t “pierce” // into them. if child.concrete { return State::Retry(StateName::DocumentContainersAfter); } } // Check for a new container. // Block quote? // Add a new container at the end of the stack. let tail = tokenizer.tokenize_state.document_container_stack.len(); tokenizer .tokenize_state .document_container_stack .push(ContainerState { kind: Container::BlockQuote, blank_initial: false, size: 0, }); // Swap the existing container with the new one. tokenizer .tokenize_state .document_container_stack .swap(tokenizer.tokenize_state.document_continued, tail); tokenizer.attempt( State::Next(StateName::DocumentContainerNewAfter), State::Next(StateName::DocumentContainerNewBeforeNotBlockQuote), ); State::Retry(StateName::BlockQuoteStart) } /// At new container, but not a block quote. // /// ```markdown /// > | * a /// ^ /// ``` pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State { // List item? // We replace the empty block quote container for this new list item one. tokenizer.tokenize_state.document_container_stack [tokenizer.tokenize_state.document_continued] = ContainerState { kind: Container::ListItem, blank_initial: false, size: 0, }; tokenizer.attempt( State::Next(StateName::DocumentContainerNewAfter), State::Next(StateName::DocumentContainerNewBeforeNotList), ); State::Retry(StateName::ListItemStart) } /// At new container, but not a block quote or list item. // /// ```markdown /// > | a /// ^ /// ``` pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State { // Footnote definition? // We replace the empty list item container for this new footnote // definition one. tokenizer.tokenize_state.document_container_stack [tokenizer.tokenize_state.document_continued] = ContainerState { kind: Container::GfmFootnoteDefinition, blank_initial: false, size: 0, }; tokenizer.attempt( State::Next(StateName::DocumentContainerNewAfter), State::Next(StateName::DocumentContainerNewBeforeNotGfmFootnoteDefinition), ); State::Retry(StateName::GfmFootnoteDefinitionStart) } /// At new container, but not a block quote, list item, or footnote definition. // /// ```markdown /// > | a /// ^ /// ``` pub fn container_new_before_not_footnote_definition(tokenizer: &mut Tokenizer) -> State { // It wasn’t a new block quote, list item, or footnote definition. // Swap the new container (in the middle) with the existing one (at the end). // Drop what was in the middle. tokenizer .tokenize_state .document_container_stack .swap_remove(tokenizer.tokenize_state.document_continued); State::Retry(StateName::DocumentContainersAfter) } /// After new container. /// /// ```markdown /// > | * a /// ^ /// > | > b /// ^ /// ``` pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { // It was a new block quote, list item, or footnote definition. // Swap the new container (in the middle) with the existing one (at the end). // Take the new container. let container = tokenizer .tokenize_state .document_container_stack .swap_remove(tokenizer.tokenize_state.document_continued); // If we did not continue all existing containers, and there is a new one, // close the flow and those containers. if tokenizer.tokenize_state.document_continued != tokenizer.tokenize_state.document_container_stack.len() { if let Err(message) = exit_containers(tokenizer, &Phase::Prefix) { return State::Error(message); } } // We are “piercing” into the flow with a new container. tokenizer .tokenize_state .document_child .as_mut() .unwrap() .pierce = true; tokenizer .tokenize_state .document_container_stack .push(container); tokenizer.tokenize_state.document_continued += 1; tokenizer.interrupt = false; State::Retry(StateName::DocumentContainerNewBefore) } /// After containers, at flow. // /// ```markdown /// > | * a /// ^ /// > | > b /// ^ /// ``` pub fn containers_after(tokenizer: &mut Tokenizer) -> State { let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); child.lazy = tokenizer.tokenize_state.document_continued != tokenizer.tokenize_state.document_container_stack.len(); child.define_skip(tokenizer.point.clone()); match tokenizer.current { // Note: EOL is part of data. None => State::Retry(StateName::DocumentFlowEnd), Some(_) => { let current = tokenizer.events.len(); let previous = tokenizer.tokenize_state.document_data_index; if let Some(previous) = previous { tokenizer.events[previous].link.as_mut().unwrap().next = Some(current); } tokenizer.tokenize_state.document_data_index = Some(current); tokenizer.enter_link( Name::Data, Link { previous, next: None, content: Content::Flow, }, ); State::Retry(StateName::DocumentFlowInside) } } } /// In flow. // /// ```markdown /// > | * ab /// ^ /// ``` pub fn flow_inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None => { tokenizer.exit(Name::Data); State::Retry(StateName::DocumentFlowEnd) } // Note: EOL is part of data. Some(b'\n') => { tokenizer.consume(); tokenizer.exit(Name::Data); State::Next(StateName::DocumentFlowEnd) } Some(_) => { tokenizer.consume(); State::Next(StateName::DocumentFlowInside) } } } /// After flow (after eol or at eof). // /// ```markdown /// | * a /// > | > b /// ^ ^ /// ``` pub fn flow_end(tokenizer: &mut Tokenizer) -> State { let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); let state = tokenizer .tokenize_state .document_child_state .take() .unwrap_or(State::Next(StateName::FlowStart)); tokenizer.tokenize_state.document_exits.push(None); let state = child.push( (child.point.index, child.point.vs), (tokenizer.point.index, tokenizer.point.vs), state, ); tokenizer.tokenize_state.document_child_state = Some(state); // If we’re in a lazy line, and the previous (lazy or not) line is something // that can be lazy, and this line is that too, allow it. // // Accept: // // ```markdown // | * a // > | b // ^ // | ``` // ``` // // Do not accept: // // ```markdown // | * # a // > | b // ^ // | ``` // ``` // // Do not accept: // // ```markdown // | * a // > | # b // ^ // | ``` // ``` let mut document_lazy_continuation_current = false; let mut stack_index = child.stack.len(); // Use two algo’s: one for when we’re suspended or in multiline things // like definitions, another for when we fed the line ending and closed. while !document_lazy_continuation_current && stack_index > 0 { stack_index -= 1; let name = &child.stack[stack_index]; if name == &Name::Content || name == &Name::GfmTableHead { document_lazy_continuation_current = true; } } // …another because we parse each “rest” line as a paragraph, and we passed // a EOL already. if !document_lazy_continuation_current && !child.events.is_empty() { let before = skip::opt_back(&child.events, child.events.len() - 1, &[Name::LineEnding]); let name = &child.events[before].name; if name == &Name::Content || name == &Name::HeadingSetextUnderline { document_lazy_continuation_current = true; } } // Reset “piercing”. child.pierce = false; if child.lazy && tokenizer.tokenize_state.document_lazy_accepting_before && document_lazy_continuation_current { tokenizer.tokenize_state.document_continued = tokenizer.tokenize_state.document_container_stack.len(); } if tokenizer.tokenize_state.document_continued != tokenizer.tokenize_state.document_container_stack.len() { let result = exit_containers(tokenizer, &Phase::After); // `Phase::After` doesn’t deal with flow: it only generates exits for // containers. // And that never errors. debug_assert!(result.is_ok(), "did not expect error when exiting"); } match tokenizer.current { None => { tokenizer.tokenize_state.document_continued = 0; if let Err(message) = exit_containers(tokenizer, &Phase::Eof) { return State::Error(message); } resolve(tokenizer); State::Ok } Some(_) => { tokenizer.tokenize_state.document_continued = 0; tokenizer.tokenize_state.document_lazy_accepting_before = document_lazy_continuation_current; // Containers would only be interrupting if we’ve continued. tokenizer.interrupt = false; State::Retry(StateName::DocumentContainerExistingBefore) } } } /// Close containers (and flow if needed). fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) -> Result<(), String> { let mut stack_close = tokenizer .tokenize_state .document_container_stack .split_off(tokenizer.tokenize_state.document_continued); let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); // Flush if needed. if *phase != Phase::After { let state = tokenizer .tokenize_state .document_child_state .take() .unwrap_or(State::Next(StateName::FlowStart)); child.flush(state, false)?; } if !stack_close.is_empty() { let index = tokenizer.tokenize_state.document_exits.len() - (if *phase == Phase::After { 2 } else { 1 }); let mut exits = Vec::with_capacity(stack_close.len()); while !stack_close.is_empty() { let container = stack_close.pop().unwrap(); let name = match container.kind { Container::BlockQuote => Name::BlockQuote, Container::GfmFootnoteDefinition => Name::GfmFootnoteDefinition, Container::ListItem => Name::ListItem, }; exits.push(Event { kind: Kind::Exit, name: name.clone(), point: tokenizer.point.clone(), link: None, }); let mut stack_index = tokenizer.stack.len(); let mut found = false; while stack_index > 0 { stack_index -= 1; if tokenizer.stack[stack_index] == name { tokenizer.stack.remove(stack_index); found = true; break; } } debug_assert!(found, "expected to find container event to exit"); } debug_assert!( tokenizer.tokenize_state.document_exits[index].is_none(), "expected no exits yet" ); tokenizer.tokenize_state.document_exits[index] = Some(exits); } child.interrupt = false; Ok(()) } // Inject everything together. fn resolve(tokenizer: &mut Tokenizer) { let child = tokenizer.tokenize_state.document_child.as_mut().unwrap(); // First, add the container exits into `child`. let mut child_index = 0; let mut line = 0; while child_index < child.events.len() { if child.events[child_index].kind == Kind::Exit && matches!( child.events[child_index].name, Name::LineEnding | Name::BlankLineEnding ) { // Inject before `Enter:LineEnding`. let mut inject_index = child_index - 1; let mut point = &child.events[inject_index].point; while child_index + 1 < child.events.len() && child.events[child_index + 1].kind == Kind::Exit { child_index += 1; point = &child.events[child_index].point; // Inject after `Exit:*`. inject_index = child_index + 1; } if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() { let mut exit_index = 0; while exit_index < exits.len() { exits[exit_index].point = point.clone(); exit_index += 1; } child.map.add(inject_index, 0, exits); } line += 1; } child_index += 1; } child.map.consume(&mut child.events); let mut flow_index = skip::to(&tokenizer.events, 0, &[Name::Data]); while flow_index < tokenizer.events.len() // To do: use `!is_some_and()` when that’s stable. && (tokenizer.events[flow_index].link.is_none() || tokenizer.events[flow_index].link.as_ref().unwrap().content != Content::Flow) { flow_index = skip::to(&tokenizer.events, flow_index + 1, &[Name::Data]); } // Now, add all child events into our parent document tokenizer. divide_events( &mut tokenizer.map, &tokenizer.events, flow_index, &mut child.events, (0, 0), ); // Replace the flow data with actual events. tokenizer.map.consume(&mut tokenizer.events); // Now, add some final container exits due to the EOF. // We can’t inject them into the child earlier, as they are “outside” its // linked data. if line < tokenizer.tokenize_state.document_exits.len() { if let Some(mut exits) = tokenizer.tokenize_state.document_exits[line].take() { let mut exit_index = 0; while exit_index < exits.len() { exits[exit_index].point = tokenizer.point.clone(); exit_index += 1; } tokenizer.events.append(&mut exits); } } // Add the resolvers from child. tokenizer .resolvers .append(&mut child.resolvers.split_off(0)); tokenizer .tokenize_state .definitions .append(&mut child.tokenize_state.definitions.split_off(0)); }