//! The document content type. //! //! **Document** represents the containers, such as block quotes and lists, //! which structure the document and contain other sections. //! //! The constructs found in flow are: //! //! * [Block quote][crate::construct::block_quote] //! * List use crate::construct::block_quote::{ cont as block_quote_cont, end as block_quote_end, start as block_quote, }; use crate::content::flow::start as flow; use crate::parser::ParseState; use crate::subtokenize::subtokenize; use crate::token::Token; use crate::tokenizer::{Code, Event, EventType, Point, State, StateFn, StateFnResult, Tokenizer}; use crate::util::edit_map::EditMap; use crate::util::{ normalize_identifier::normalize_identifier, span::{from_exit_event, serialize}, }; use std::collections::HashSet; struct DocumentInfo { continued: usize, stack: Vec, next: Box, last_line_ending_index: Option, map: EditMap, } /// Turn `codes` as the document content type into events. pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec { let mut tokenizer = Tokenizer::new(point, index, parse_state); tokenizer.push(&parse_state.codes, Box::new(start), true); let mut index = 0; let mut next_definitions: HashSet = HashSet::new(); while index < tokenizer.events.len() { let event = &tokenizer.events[index]; if event.event_type == EventType::Exit && event.token_type == Token::DefinitionLabelString { next_definitions.insert(normalize_identifier( serialize( &parse_state.codes, &from_exit_event(&tokenizer.events, index), false, ) .as_str(), )); } index += 1; } let mut result = (tokenizer.events, false); parse_state.definitions = next_definitions; while !result.1 { result = subtokenize(result.0, parse_state); } result.0 } fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let info = DocumentInfo { continued: 0, stack: vec![], next: Box::new(flow), last_line_ending_index: None, map: EditMap::new(), }; before(tokenizer, code, info) } fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { println!("before: check existing open containers"); // First we iterate through the open blocks, starting with the root // document, and descending through last children down to the last open // block. // Each block imposes a condition that the line must satisfy if the block // is to remain open. // For example, a block quote requires a `>` character. // A paragraph requires a non-blank line. // In this phase we may match all or just some of the open blocks. // But we cannot close unmatched blocks yet, because we may have a lazy // continuation line. if info.continued < info.stack.len() { let name = &info.stack[info.continued]; // To do: list. let cont = if name == "blockquote" { block_quote_cont } else { unreachable!("todo: cont construct {:?}", name) }; // To do: state? tokenizer.attempt(cont, move |ok| { if ok { Box::new(|t, c| document_continue(t, c, info)) } else { Box::new(|t, c| check_new_containers(t, c, info)) } })(tokenizer, code) } else { // Done. check_new_containers(tokenizer, code, info) } } fn document_continue( tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo, ) -> StateFnResult { println!("document_continue"); info.continued += 1; println!(" to do: close flow sometimes?"); // // Note: this field is called `_closeFlow` but it also closes containers. // // Perhaps a good idea to rename it but it’s already used in the wild by // // extensions. // if (self.containerState._closeFlow) { // self.containerState._closeFlow = undefined // if (childFlow) { // closeFlow() // } // // Note: this algorithm for moving events around is similar to the // // algorithm when dealing with lazy lines in `writeToChild`. // const indexBeforeExits = self.events.length // let indexBeforeFlow = indexBeforeExits // /** @type {Point|undefined} */ // let point // // Find the flow chunk. // while (indexBeforeFlow--) { // if ( // self.events[indexBeforeFlow][0] === 'exit' && // self.events[indexBeforeFlow][1].type === types.chunkFlow // ) { // point = self.events[indexBeforeFlow][1].end // break // } // } // assert(point, 'could not find previous flow chunk') let size = info.continued; exit_containers(tokenizer, &mut info, size); // // Fix positions. // let index = indexBeforeExits // while (index < self.events.length) { // self.events[index][1].end = Object.assign({}, point) // index++ // } // // Inject the exits earlier (they’re still also at the end). // splice( // self.events, // indexBeforeFlow + 1, // 0, // self.events.slice(indexBeforeExits) // ) // // Discard the duplicate exits. // self.events.length = index // return checkNewContainers(code) // } before(tokenizer, code, info) } // documentContinue fn check_new_containers( tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo, ) -> StateFnResult { println!("check_new_containers"); // Next, after consuming the continuation markers for existing blocks, we // look for new block starts (e.g. `>` for a block quote). // If we encounter a new block start, we close any blocks unmatched in // step 1 before creating the new block as a child of the last matched // block. if info.continued == info.stack.len() { println!(" to do: interrupt ({:?})?", tokenizer.interrupt); // // No need to `check` whether there’s a container, of `exitContainers` // // would be moot. // // We can instead immediately `attempt` to parse one. // if (!childFlow) { // return documentContinued(code) // } // If we have concrete content, such as block HTML or fenced code, // we can’t have containers “pierce” into them, so we can immediately // start. if tokenizer.concrete { println!(" concrete!"); return flow_start(tokenizer, code, info); } // // If we do have flow, it could still be a blank line, // // but we’d be interrupting it w/ a new container if there’s a current // // construct. // self.interrupt = Boolean( // childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack // ) } // Check if there is a new container. // To do: list. tokenizer.attempt(block_quote, move |ok| { if ok { Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string())) } else { Box::new(|t, c| there_is_no_new_container(t, c, info)) } })(tokenizer, code) } fn there_is_a_new_container( tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo, name: String, ) -> StateFnResult { println!("there_is_a_new_container"); println!(" todo: close_flow"); // if (childFlow) closeFlow() let size = info.continued; exit_containers(tokenizer, &mut info, size); info.stack.push(name); info.continued += 1; document_continued(tokenizer, code, info) } /// Exit open containers. fn exit_containers(tokenizer: &mut Tokenizer, info: &mut DocumentInfo, size: usize) { while info.stack.len() > size { let name = info.stack.pop().unwrap(); // To do: list. let end = if name == "blockquote" { block_quote_end } else { unreachable!("todo: cont {:?}", name) }; // To do: improve below code. let insert_index = if let Some(index) = info.last_line_ending_index { index } else { tokenizer.events.len() }; let eol_point = if let Some(index) = info.last_line_ending_index { tokenizer.events[index].point.clone() } else { tokenizer.point.clone() }; let eol_index = if let Some(index) = info.last_line_ending_index { tokenizer.events[index].index } else { tokenizer.index }; let token_types = end(); let mut index = 0; while index < token_types.len() { let token_type = &token_types[index]; info.map.add( insert_index, 0, vec![Event { event_type: EventType::Exit, token_type: token_type.clone(), point: eol_point.clone(), index: eol_index, previous: None, next: None, content_type: None, }], ); let mut stack_index = tokenizer.stack.len(); while stack_index > 0 { stack_index -= 1; if tokenizer.stack[stack_index] == *token_type { break; } } assert_eq!( tokenizer.stack[stack_index], *token_type, "expected token type" ); tokenizer.stack.remove(stack_index); index += 1; } } } fn there_is_no_new_container( tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo, ) -> StateFnResult { let lazy = info.continued != info.stack.len(); tokenizer.lazy = lazy; println!("there is no new container"); if lazy { println!( " This line will be lazy. Depending on what is parsed now, we need to close containers before?" ); } // lineStartOffset = self.now().offset flow_start(tokenizer, code, info) } fn document_continued(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { println!("document_continued"); // Try new containers. // To do: list. tokenizer.attempt(block_quote, |ok| { if ok { Box::new(|t, c| container_continue(t, c, info)) } else { Box::new(|t, c| { // To do: this looks like a bug? t.lazy = false; flow_start(t, c, info) }) } })(tokenizer, code) } fn container_continue( tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo, ) -> StateFnResult { println!("container_continue"); // assert( // self.currentConstruct, // 'expected `currentConstruct` to be defined on tokenizer' // ) // assert( // self.containerState, // 'expected `containerState` to be defined on tokenizer' // ) info.continued += 1; // To do: add to stack? // stack.push([self.currentConstruct, self.containerState]) // Try another. document_continued(tokenizer, code, info) } fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { println!("flow_start"); let next = info.next; info.next = Box::new(flow); // This is weird but Rust needs a function there. let size = info.continued; exit_containers(tokenizer, &mut info, size); tokenizer.go_until(next, eof_eol, move |(state, remainder)| { ( State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))), remainder, ) })(tokenizer, code) } fn flow_end( tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo, result: State, ) -> StateFnResult { println!("flow_end"); let was_lazy = tokenizer.lazy; if was_lazy { println!( "this line was lazy. Depeding on what was parsed, we need to exit containers after it?" ); } info.continued = 0; // To do: blank lines? Other things? if tokenizer.events.len() > 2 && tokenizer.events[tokenizer.events.len() - 1].token_type == Token::LineEnding { info.last_line_ending_index = Some(tokenizer.events.len() - 2); } else { info.last_line_ending_index = None; } match result { State::Ok => { println!("State::Ok"); exit_containers(tokenizer, &mut info, 0); tokenizer.events = info.map.consume(&mut tokenizer.events); (State::Ok, Some(vec![code])) } State::Nok => unreachable!("handle nok in `flow`?"), State::Fn(func) => { info.next = func; before(tokenizer, code, info) } } } fn eof_eol(code: Code) -> bool { matches!( code, Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') ) }