aboutsummaryrefslogtreecommitdiffstats
path: root/src/content
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-07 17:21:38 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-07 17:36:35 +0200
commit4806864e5377a5fef937b3fa02542e620c547969 (patch)
treec91ae2bbd1dc2037f425efd24d62d05e706e3e60 /src/content
parentc2b4402223e53498078fc33dd55aabc0a48cdb56 (diff)
downloadmarkdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.gz
markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.bz2
markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.zip
Add basic support for block quotes
Diffstat (limited to 'src/content')
-rw-r--r--src/content/document.rs439
-rw-r--r--src/content/flow.rs49
-rw-r--r--src/content/mod.rs6
3 files changed, 447 insertions, 47 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
new file mode 100644
index 0000000..dd5038f
--- /dev/null
+++ b/src/content/document.rs
@@ -0,0 +1,439 @@
+//! The document content type.
+//!
+//! **Document** represents the containers, such as block quotes and lists,
+//! which structure the document and contain other sections.
+//!
+//! The constructs found in flow are:
+//!
+//! * [Block quote][crate::construct::block_quote]
+//! * List
+
+use crate::construct::block_quote::{
+ cont as block_quote_cont, end as block_quote_end, start as block_quote,
+};
+use crate::content::flow::start as flow;
+use crate::parser::ParseState;
+use crate::subtokenize::subtokenize;
+use crate::tokenizer::{
+ Code, Event, EventType, Point, State, StateFn, StateFnResult, TokenType, Tokenizer,
+};
+use crate::util::edit_map::EditMap;
+use crate::util::{
+ normalize_identifier::normalize_identifier,
+ span::{from_exit_event, serialize},
+};
+use std::collections::HashSet;
+
+struct DocumentInfo {
+ continued: usize,
+ stack: Vec<String>,
+ next: Box<StateFn>,
+ last_line_ending_index: Option<usize>,
+ map: EditMap,
+}
+
+/// Turn `codes` as the document content type into events.
+pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> {
+ let mut tokenizer = Tokenizer::new(point, index, parse_state);
+
+ tokenizer.push(&parse_state.codes, Box::new(start), true);
+
+ let mut index = 0;
+ let mut next_definitions: HashSet<String> = HashSet::new();
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.event_type == EventType::Exit
+ && event.token_type == TokenType::DefinitionLabelString
+ {
+ next_definitions.insert(normalize_identifier(
+ serialize(
+ &parse_state.codes,
+ &from_exit_event(&tokenizer.events, index),
+ false,
+ )
+ .as_str(),
+ ));
+ }
+
+ index += 1;
+ }
+
+ let mut result = (tokenizer.events, false);
+
+ parse_state.definitions = next_definitions;
+
+ while !result.1 {
+ result = subtokenize(result.0, parse_state);
+ }
+
+ result.0
+}
+
+fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ let info = DocumentInfo {
+ continued: 0,
+ stack: vec![],
+ next: Box::new(flow),
+ last_line_ending_index: None,
+ map: EditMap::new(),
+ };
+ before(tokenizer, code, info)
+}
+
+fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult {
+ println!("before: check existing open containers");
+ // First we iterate through the open blocks, starting with the root
+ // document, and descending through last children down to the last open
+ // block.
+ // Each block imposes a condition that the line must satisfy if the block
+ // is to remain open.
+ // For example, a block quote requires a `>` character.
+ // A paragraph requires a non-blank line.
+ // In this phase we may match all or just some of the open blocks.
+ // But we cannot close unmatched blocks yet, because we may have a lazy
+ // continuation line.
+ if info.continued < info.stack.len() {
+ let name = &info.stack[info.continued];
+ // To do: list.
+ let cont = if name == "blockquote" {
+ block_quote_cont
+ } else {
+ unreachable!("todo: cont construct {:?}", name)
+ };
+
+ // To do: state?
+
+ tokenizer.attempt(cont, move |ok| {
+ if ok {
+ Box::new(|t, c| document_continue(t, c, info))
+ } else {
+ Box::new(|t, c| check_new_containers(t, c, info))
+ }
+ })(tokenizer, code)
+ } else {
+ // Done.
+ check_new_containers(tokenizer, code, info)
+ }
+}
+
+fn document_continue(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ mut info: DocumentInfo,
+) -> StateFnResult {
+ println!("document_continue");
+ info.continued += 1;
+
+ println!(" to do: close flow sometimes?");
+ // // Note: this field is called `_closeFlow` but it also closes containers.
+ // // Perhaps a good idea to rename it but it’s already used in the wild by
+ // // extensions.
+ // if (self.containerState._closeFlow) {
+ // self.containerState._closeFlow = undefined
+
+ // if (childFlow) {
+ // closeFlow()
+ // }
+
+ // // Note: this algorithm for moving events around is similar to the
+ // // algorithm when dealing with lazy lines in `writeToChild`.
+ // const indexBeforeExits = self.events.length
+ // let indexBeforeFlow = indexBeforeExits
+ // /** @type {Point|undefined} */
+ // let point
+
+ // // Find the flow chunk.
+ // while (indexBeforeFlow--) {
+ // if (
+ // self.events[indexBeforeFlow][0] === 'exit' &&
+ // self.events[indexBeforeFlow][1].type === types.chunkFlow
+ // ) {
+ // point = self.events[indexBeforeFlow][1].end
+ // break
+ // }
+ // }
+
+ // assert(point, 'could not find previous flow chunk')
+
+ let size = info.continued;
+ exit_containers(tokenizer, &mut info, size);
+
+ // // Fix positions.
+ // let index = indexBeforeExits
+
+ // while (index < self.events.length) {
+ // self.events[index][1].end = Object.assign({}, point)
+ // index++
+ // }
+
+ // // Inject the exits earlier (they’re still also at the end).
+ // splice(
+ // self.events,
+ // indexBeforeFlow + 1,
+ // 0,
+ // self.events.slice(indexBeforeExits)
+ // )
+
+ // // Discard the duplicate exits.
+ // self.events.length = index
+
+ // return checkNewContainers(code)
+ // }
+
+ before(tokenizer, code, info)
+}
+// documentContinue
+
+fn check_new_containers(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ info: DocumentInfo,
+) -> StateFnResult {
+ println!("check_new_containers");
+ // Next, after consuming the continuation markers for existing blocks, we
+ // look for new block starts (e.g. `>` for a block quote).
+ // If we encounter a new block start, we close any blocks unmatched in
+ // step 1 before creating the new block as a child of the last matched
+ // block.
+ if info.continued == info.stack.len() {
+ println!(" to do: concrete? interrupt?");
+ // // No need to `check` whether there’s a container, of `exitContainers`
+ // // would be moot.
+ // // We can instead immediately `attempt` to parse one.
+ // if (!childFlow) {
+ // return documentContinued(code)
+ // }
+
+ // // If we have concrete content, such as block HTML or fenced code,
+ // // we can’t have containers “pierce” into them, so we can immediately
+ // // start.
+ // if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) {
+ // return flowStart(code)
+ // }
+
+ // // If we do have flow, it could still be a blank line,
+ // // but we’d be interrupting it w/ a new container if there’s a current
+ // // construct.
+ // self.interrupt = Boolean(
+ // childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
+ // )
+ }
+
+ // Check if there is a new container.
+ // To do: list.
+ tokenizer.attempt(block_quote, move |ok| {
+ if ok {
+ Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string()))
+ } else {
+ Box::new(|t, c| there_is_no_new_container(t, c, info))
+ }
+ })(tokenizer, code)
+}
+
+fn there_is_a_new_container(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ mut info: DocumentInfo,
+ name: String,
+) -> StateFnResult {
+ println!("there_is_a_new_container");
+ println!(" todo: close_flow");
+ // if (childFlow) closeFlow()
+ let size = info.continued;
+ exit_containers(tokenizer, &mut info, size);
+ info.stack.push(name);
+ info.continued += 1;
+ document_continued(tokenizer, code, info)
+}
+
+/// Exit open containers.
+fn exit_containers(tokenizer: &mut Tokenizer, info: &mut DocumentInfo, size: usize) {
+ while info.stack.len() > size {
+ let name = info.stack.pop().unwrap();
+
+ // To do: list.
+ let end = if name == "blockquote" {
+ block_quote_end
+ } else {
+ unreachable!("todo: cont {:?}", name)
+ };
+
+ // To do: improve below code.
+ let insert_index = if let Some(index) = info.last_line_ending_index {
+ index
+ } else {
+ tokenizer.events.len()
+ };
+ let eol_point = if let Some(index) = info.last_line_ending_index {
+ tokenizer.events[index].point.clone()
+ } else {
+ tokenizer.point.clone()
+ };
+ let eol_index = if let Some(index) = info.last_line_ending_index {
+ tokenizer.events[index].index
+ } else {
+ tokenizer.index
+ };
+
+ let token_types = end();
+
+ let mut index = 0;
+ while index < token_types.len() {
+ let token_type = &token_types[index];
+
+ info.map.add(
+ insert_index,
+ 0,
+ vec![Event {
+ event_type: EventType::Exit,
+ token_type: token_type.clone(),
+ point: eol_point.clone(),
+ index: eol_index,
+ previous: None,
+ next: None,
+ content_type: None,
+ }],
+ );
+
+ let mut stack_index = tokenizer.stack.len();
+
+ while stack_index > 0 {
+ stack_index -= 1;
+
+ if tokenizer.stack[stack_index] == *token_type {
+ break;
+ }
+ }
+
+ assert_eq!(
+ tokenizer.stack[stack_index], *token_type,
+ "expected token type"
+ );
+ tokenizer.stack.remove(stack_index);
+
+ index += 1;
+ }
+ }
+}
+
+fn there_is_no_new_container(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ info: DocumentInfo,
+) -> StateFnResult {
+ let lazy = info.continued != info.stack.len();
+ tokenizer.lazy = lazy;
+ println!("there is no new container");
+ if lazy {
+ println!(
+ " This line will be lazy. Depending on what is parsed now, we need to close containers before?"
+ );
+ }
+ // lineStartOffset = self.now().offset
+ flow_start(tokenizer, code, info)
+}
+
+fn document_continued(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult {
+ println!("document_continued");
+
+ // Try new containers.
+ // To do: list.
+ tokenizer.attempt(block_quote, |ok| {
+ if ok {
+ Box::new(|t, c| container_continue(t, c, info))
+ } else {
+ Box::new(|t, c| {
+ // To do: this looks like a bug?
+ t.lazy = false;
+ flow_start(t, c, info)
+ })
+ }
+ })(tokenizer, code)
+}
+
+fn container_continue(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ mut info: DocumentInfo,
+) -> StateFnResult {
+ println!("container_continue");
+ // assert(
+ // self.currentConstruct,
+ // 'expected `currentConstruct` to be defined on tokenizer'
+ // )
+ // assert(
+ // self.containerState,
+ // 'expected `containerState` to be defined on tokenizer'
+ // )
+ info.continued += 1;
+ // To do: add to stack?
+ // stack.push([self.currentConstruct, self.containerState])
+ // Try another.
+ document_continued(tokenizer, code, info)
+}
+
+fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
+ println!("flow_start");
+ let next = info.next;
+ info.next = Box::new(flow); // This is weird but Rust needs a function there.
+
+ let size = info.continued;
+ exit_containers(tokenizer, &mut info, size);
+
+ tokenizer.go_until(next, eof_eol, move |(state, remainder)| {
+ (
+ State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))),
+ remainder,
+ )
+ })(tokenizer, code)
+}
+
+fn flow_end(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ mut info: DocumentInfo,
+ result: State,
+) -> StateFnResult {
+ println!("flow_end");
+ let was_lazy = tokenizer.lazy;
+
+ if was_lazy {
+ println!(
+ "this line was lazy. Depeding on what was parsed, we need to exit containers after it?"
+ );
+ }
+
+ info.continued = 0;
+
+ // To do: blank lines? Other things?
+ if tokenizer.events.len() > 2
+ && tokenizer.events[tokenizer.events.len() - 1].token_type == TokenType::LineEnding
+ {
+ info.last_line_ending_index = Some(tokenizer.events.len() - 2);
+ } else {
+ info.last_line_ending_index = None;
+ }
+
+ match result {
+ State::Ok => {
+ println!("State::Ok");
+ exit_containers(tokenizer, &mut info, 0);
+ tokenizer.events = info.map.consume(&mut tokenizer.events);
+ (State::Ok, Some(vec![code]))
+ }
+ State::Nok => unreachable!("handle nok in `flow`?"),
+ State::Fn(func) => {
+ info.next = func;
+ before(tokenizer, code, info)
+ }
+ }
+}
+
+fn eof_eol(code: Code) -> bool {
+ matches!(
+ code,
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
+ )
+}
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 74c6a62..f406685 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -26,52 +26,7 @@ use crate::construct::{
html_flow::start as html_flow, paragraph::start as paragraph,
thematic_break::start as thematic_break,
};
-use crate::parser::ParseState;
-use crate::subtokenize::subtokenize;
-use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::{
- normalize_identifier::normalize_identifier,
- span::{from_exit_event, serialize},
-};
-use std::collections::HashSet;
-
-/// Turn `codes` as the flow content type into events.
-pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> {
- let mut tokenizer = Tokenizer::new(point, index, parse_state);
- tokenizer.push(&parse_state.codes, Box::new(start), true);
- let mut next_definitions: HashSet<String> = HashSet::new();
-
- let mut index = 0;
-
- while index < tokenizer.events.len() {
- let event = &tokenizer.events[index];
-
- if event.event_type == EventType::Exit
- && event.token_type == TokenType::DefinitionLabelString
- {
- next_definitions.insert(normalize_identifier(
- serialize(
- &parse_state.codes,
- &from_exit_event(&tokenizer.events, index),
- false,
- )
- .as_str(),
- ));
- }
-
- index += 1;
- }
-
- let mut result = (tokenizer.events, false);
-
- parse_state.definitions = next_definitions;
-
- while !result.1 {
- result = subtokenize(result.0, parse_state);
- }
-
- result.0
-}
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Before flow.
///
@@ -83,7 +38,7 @@ pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Eve
/// | bravo
/// |***
/// ```
-fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Ok, None),
_ => tokenizer.attempt(blank_line, |ok| {
diff --git a/src/content/mod.rs b/src/content/mod.rs
index ae8ad83..af40cc0 100644
--- a/src/content/mod.rs
+++ b/src/content/mod.rs
@@ -1,5 +1,11 @@
//! Content types found in markdown.
+//!
+//! * [document][document]
+//! * [flow][flow]
+//! * [string][string]
+//! * [text][text]
+pub mod document;
pub mod flow;
pub mod string;
pub mod text;