diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-07 17:21:38 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-07 17:36:35 +0200 |
commit | 4806864e5377a5fef937b3fa02542e620c547969 (patch) | |
tree | c91ae2bbd1dc2037f425efd24d62d05e706e3e60 /src/content | |
parent | c2b4402223e53498078fc33dd55aabc0a48cdb56 (diff) | |
download | markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.gz markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.bz2 markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.zip |
Add basic support for block quotes
Diffstat (limited to 'src/content')
-rw-r--r-- | src/content/document.rs | 439 | ||||
-rw-r--r-- | src/content/flow.rs | 49 | ||||
-rw-r--r-- | src/content/mod.rs | 6 |
3 files changed, 447 insertions, 47 deletions
diff --git a/src/content/document.rs b/src/content/document.rs new file mode 100644 index 0000000..dd5038f --- /dev/null +++ b/src/content/document.rs @@ -0,0 +1,439 @@ +//! The document content type. +//! +//! **Document** represents the containers, such as block quotes and lists, +//! which structure the document and contain other sections. +//! +//! The constructs found in flow are: +//! +//! * [Block quote][crate::construct::block_quote] +//! * List + +use crate::construct::block_quote::{ + cont as block_quote_cont, end as block_quote_end, start as block_quote, +}; +use crate::content::flow::start as flow; +use crate::parser::ParseState; +use crate::subtokenize::subtokenize; +use crate::tokenizer::{ + Code, Event, EventType, Point, State, StateFn, StateFnResult, TokenType, Tokenizer, +}; +use crate::util::edit_map::EditMap; +use crate::util::{ + normalize_identifier::normalize_identifier, + span::{from_exit_event, serialize}, +}; +use std::collections::HashSet; + +struct DocumentInfo { + continued: usize, + stack: Vec<String>, + next: Box<StateFn>, + last_line_ending_index: Option<usize>, + map: EditMap, +} + +/// Turn `codes` as the document content type into events. +pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> { + let mut tokenizer = Tokenizer::new(point, index, parse_state); + + tokenizer.push(&parse_state.codes, Box::new(start), true); + + let mut index = 0; + let mut next_definitions: HashSet<String> = HashSet::new(); + + while index < tokenizer.events.len() { + let event = &tokenizer.events[index]; + + if event.event_type == EventType::Exit + && event.token_type == TokenType::DefinitionLabelString + { + next_definitions.insert(normalize_identifier( + serialize( + &parse_state.codes, + &from_exit_event(&tokenizer.events, index), + false, + ) + .as_str(), + )); + } + + index += 1; + } + + let mut result = (tokenizer.events, false); + + parse_state.definitions = next_definitions; + + while !result.1 { + result = subtokenize(result.0, parse_state); + } + + result.0 +} + +fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let info = DocumentInfo { + continued: 0, + stack: vec![], + next: Box::new(flow), + last_line_ending_index: None, + map: EditMap::new(), + }; + before(tokenizer, code, info) +} + +fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { + println!("before: check existing open containers"); + // First we iterate through the open blocks, starting with the root + // document, and descending through last children down to the last open + // block. + // Each block imposes a condition that the line must satisfy if the block + // is to remain open. + // For example, a block quote requires a `>` character. + // A paragraph requires a non-blank line. + // In this phase we may match all or just some of the open blocks. + // But we cannot close unmatched blocks yet, because we may have a lazy + // continuation line. + if info.continued < info.stack.len() { + let name = &info.stack[info.continued]; + // To do: list. + let cont = if name == "blockquote" { + block_quote_cont + } else { + unreachable!("todo: cont construct {:?}", name) + }; + + // To do: state? + + tokenizer.attempt(cont, move |ok| { + if ok { + Box::new(|t, c| document_continue(t, c, info)) + } else { + Box::new(|t, c| check_new_containers(t, c, info)) + } + })(tokenizer, code) + } else { + // Done. + check_new_containers(tokenizer, code, info) + } +} + +fn document_continue( + tokenizer: &mut Tokenizer, + code: Code, + mut info: DocumentInfo, +) -> StateFnResult { + println!("document_continue"); + info.continued += 1; + + println!(" to do: close flow sometimes?"); + // // Note: this field is called `_closeFlow` but it also closes containers. + // // Perhaps a good idea to rename it but it’s already used in the wild by + // // extensions. + // if (self.containerState._closeFlow) { + // self.containerState._closeFlow = undefined + + // if (childFlow) { + // closeFlow() + // } + + // // Note: this algorithm for moving events around is similar to the + // // algorithm when dealing with lazy lines in `writeToChild`. + // const indexBeforeExits = self.events.length + // let indexBeforeFlow = indexBeforeExits + // /** @type {Point|undefined} */ + // let point + + // // Find the flow chunk. + // while (indexBeforeFlow--) { + // if ( + // self.events[indexBeforeFlow][0] === 'exit' && + // self.events[indexBeforeFlow][1].type === types.chunkFlow + // ) { + // point = self.events[indexBeforeFlow][1].end + // break + // } + // } + + // assert(point, 'could not find previous flow chunk') + + let size = info.continued; + exit_containers(tokenizer, &mut info, size); + + // // Fix positions. + // let index = indexBeforeExits + + // while (index < self.events.length) { + // self.events[index][1].end = Object.assign({}, point) + // index++ + // } + + // // Inject the exits earlier (they’re still also at the end). + // splice( + // self.events, + // indexBeforeFlow + 1, + // 0, + // self.events.slice(indexBeforeExits) + // ) + + // // Discard the duplicate exits. + // self.events.length = index + + // return checkNewContainers(code) + // } + + before(tokenizer, code, info) +} +// documentContinue + +fn check_new_containers( + tokenizer: &mut Tokenizer, + code: Code, + info: DocumentInfo, +) -> StateFnResult { + println!("check_new_containers"); + // Next, after consuming the continuation markers for existing blocks, we + // look for new block starts (e.g. `>` for a block quote). + // If we encounter a new block start, we close any blocks unmatched in + // step 1 before creating the new block as a child of the last matched + // block. + if info.continued == info.stack.len() { + println!(" to do: concrete? interrupt?"); + // // No need to `check` whether there’s a container, of `exitContainers` + // // would be moot. + // // We can instead immediately `attempt` to parse one. + // if (!childFlow) { + // return documentContinued(code) + // } + + // // If we have concrete content, such as block HTML or fenced code, + // // we can’t have containers “pierce” into them, so we can immediately + // // start. + // if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) { + // return flowStart(code) + // } + + // // If we do have flow, it could still be a blank line, + // // but we’d be interrupting it w/ a new container if there’s a current + // // construct. + // self.interrupt = Boolean( + // childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack + // ) + } + + // Check if there is a new container. + // To do: list. + tokenizer.attempt(block_quote, move |ok| { + if ok { + Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string())) + } else { + Box::new(|t, c| there_is_no_new_container(t, c, info)) + } + })(tokenizer, code) +} + +fn there_is_a_new_container( + tokenizer: &mut Tokenizer, + code: Code, + mut info: DocumentInfo, + name: String, +) -> StateFnResult { + println!("there_is_a_new_container"); + println!(" todo: close_flow"); + // if (childFlow) closeFlow() + let size = info.continued; + exit_containers(tokenizer, &mut info, size); + info.stack.push(name); + info.continued += 1; + document_continued(tokenizer, code, info) +} + +/// Exit open containers. +fn exit_containers(tokenizer: &mut Tokenizer, info: &mut DocumentInfo, size: usize) { + while info.stack.len() > size { + let name = info.stack.pop().unwrap(); + + // To do: list. + let end = if name == "blockquote" { + block_quote_end + } else { + unreachable!("todo: cont {:?}", name) + }; + + // To do: improve below code. + let insert_index = if let Some(index) = info.last_line_ending_index { + index + } else { + tokenizer.events.len() + }; + let eol_point = if let Some(index) = info.last_line_ending_index { + tokenizer.events[index].point.clone() + } else { + tokenizer.point.clone() + }; + let eol_index = if let Some(index) = info.last_line_ending_index { + tokenizer.events[index].index + } else { + tokenizer.index + }; + + let token_types = end(); + + let mut index = 0; + while index < token_types.len() { + let token_type = &token_types[index]; + + info.map.add( + insert_index, + 0, + vec![Event { + event_type: EventType::Exit, + token_type: token_type.clone(), + point: eol_point.clone(), + index: eol_index, + previous: None, + next: None, + content_type: None, + }], + ); + + let mut stack_index = tokenizer.stack.len(); + + while stack_index > 0 { + stack_index -= 1; + + if tokenizer.stack[stack_index] == *token_type { + break; + } + } + + assert_eq!( + tokenizer.stack[stack_index], *token_type, + "expected token type" + ); + tokenizer.stack.remove(stack_index); + + index += 1; + } + } +} + +fn there_is_no_new_container( + tokenizer: &mut Tokenizer, + code: Code, + info: DocumentInfo, +) -> StateFnResult { + let lazy = info.continued != info.stack.len(); + tokenizer.lazy = lazy; + println!("there is no new container"); + if lazy { + println!( + " This line will be lazy. Depending on what is parsed now, we need to close containers before?" + ); + } + // lineStartOffset = self.now().offset + flow_start(tokenizer, code, info) +} + +fn document_continued(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { + println!("document_continued"); + + // Try new containers. + // To do: list. + tokenizer.attempt(block_quote, |ok| { + if ok { + Box::new(|t, c| container_continue(t, c, info)) + } else { + Box::new(|t, c| { + // To do: this looks like a bug? + t.lazy = false; + flow_start(t, c, info) + }) + } + })(tokenizer, code) +} + +fn container_continue( + tokenizer: &mut Tokenizer, + code: Code, + mut info: DocumentInfo, +) -> StateFnResult { + println!("container_continue"); + // assert( + // self.currentConstruct, + // 'expected `currentConstruct` to be defined on tokenizer' + // ) + // assert( + // self.containerState, + // 'expected `containerState` to be defined on tokenizer' + // ) + info.continued += 1; + // To do: add to stack? + // stack.push([self.currentConstruct, self.containerState]) + // Try another. + document_continued(tokenizer, code, info) +} + +fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { + println!("flow_start"); + let next = info.next; + info.next = Box::new(flow); // This is weird but Rust needs a function there. + + let size = info.continued; + exit_containers(tokenizer, &mut info, size); + + tokenizer.go_until(next, eof_eol, move |(state, remainder)| { + ( + State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))), + remainder, + ) + })(tokenizer, code) +} + +fn flow_end( + tokenizer: &mut Tokenizer, + code: Code, + mut info: DocumentInfo, + result: State, +) -> StateFnResult { + println!("flow_end"); + let was_lazy = tokenizer.lazy; + + if was_lazy { + println!( + "this line was lazy. Depeding on what was parsed, we need to exit containers after it?" + ); + } + + info.continued = 0; + + // To do: blank lines? Other things? + if tokenizer.events.len() > 2 + && tokenizer.events[tokenizer.events.len() - 1].token_type == TokenType::LineEnding + { + info.last_line_ending_index = Some(tokenizer.events.len() - 2); + } else { + info.last_line_ending_index = None; + } + + match result { + State::Ok => { + println!("State::Ok"); + exit_containers(tokenizer, &mut info, 0); + tokenizer.events = info.map.consume(&mut tokenizer.events); + (State::Ok, Some(vec![code])) + } + State::Nok => unreachable!("handle nok in `flow`?"), + State::Fn(func) => { + info.next = func; + before(tokenizer, code, info) + } + } +} + +fn eof_eol(code: Code) -> bool { + matches!( + code, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') + ) +} diff --git a/src/content/flow.rs b/src/content/flow.rs index 74c6a62..f406685 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -26,52 +26,7 @@ use crate::construct::{ html_flow::start as html_flow, paragraph::start as paragraph, thematic_break::start as thematic_break, }; -use crate::parser::ParseState; -use crate::subtokenize::subtokenize; -use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::{ - normalize_identifier::normalize_identifier, - span::{from_exit_event, serialize}, -}; -use std::collections::HashSet; - -/// Turn `codes` as the flow content type into events. -pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> { - let mut tokenizer = Tokenizer::new(point, index, parse_state); - tokenizer.push(&parse_state.codes, Box::new(start), true); - let mut next_definitions: HashSet<String> = HashSet::new(); - - let mut index = 0; - - while index < tokenizer.events.len() { - let event = &tokenizer.events[index]; - - if event.event_type == EventType::Exit - && event.token_type == TokenType::DefinitionLabelString - { - next_definitions.insert(normalize_identifier( - serialize( - &parse_state.codes, - &from_exit_event(&tokenizer.events, index), - false, - ) - .as_str(), - )); - } - - index += 1; - } - - let mut result = (tokenizer.events, false); - - parse_state.definitions = next_definitions; - - while !result.1 { - result = subtokenize(result.0, parse_state); - } - - result.0 -} +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// Before flow. /// @@ -83,7 +38,7 @@ pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Eve /// | bravo /// |*** /// ``` -fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Ok, None), _ => tokenizer.attempt(blank_line, |ok| { diff --git a/src/content/mod.rs b/src/content/mod.rs index ae8ad83..af40cc0 100644 --- a/src/content/mod.rs +++ b/src/content/mod.rs @@ -1,5 +1,11 @@ //! Content types found in markdown. +//! +//! * [document][document] +//! * [flow][flow] +//! * [string][string] +//! * [text][text] +pub mod document; pub mod flow; pub mod string; pub mod text; |