aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-15 14:45:50 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-15 14:45:50 +0200
commit337d3b3d8fca29ff90cf79c18690e14fb7a17ae2 (patch)
tree6ebf499875bd98aa72eccc7d9edf963142a08609 /src
parentd19934cf351168bd1a3a285b35af047f0fb3655b (diff)
downloadmarkdown-rs-337d3b3d8fca29ff90cf79c18690e14fb7a17ae2.tar.gz
markdown-rs-337d3b3d8fca29ff90cf79c18690e14fb7a17ae2.tar.bz2
markdown-rs-337d3b3d8fca29ff90cf79c18690e14fb7a17ae2.zip
Add docs and refactor document content type
Diffstat (limited to 'src')
-rw-r--r--src/content/document.rs527
-rw-r--r--src/tokenizer.rs11
2 files changed, 278 insertions, 260 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
index 2006021..82ccd65 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -6,7 +6,7 @@
//! The constructs found in flow are:
//!
//! * [Block quote][crate::construct::block_quote]
-//! * List
+//! * [List][crate::construct::list]
use crate::construct::{
block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote},
@@ -17,7 +17,8 @@ use crate::parser::ParseState;
use crate::subtokenize::subtokenize;
use crate::token::Token;
use crate::tokenizer::{
- Code, ContainerState, Event, EventType, Point, State, StateFn, StateFnResult, Tokenizer,
+ Code, Container, ContainerState, Event, EventType, Point, State, StateFn, StateFnResult,
+ Tokenizer,
};
use crate::util::edit_map::EditMap;
use crate::util::{
@@ -27,21 +28,52 @@ use crate::util::{
};
use std::collections::HashSet;
+/// Phases where we can exit containers.
#[derive(Debug, PartialEq)]
-enum Container {
- BlockQuote,
- ListItem,
+enum Phase {
+ /// After parsing a line of lazy flow which resulted in something that
+ /// exits containers before the line.
+ ///
+ /// ```markdown
+ /// | * a
+ /// > | ```js
+ /// ^
+ /// | b
+ /// | ```
+ /// ```
+ After,
+ /// When a new container replaces an existing container.
+ ///
+ /// ```markdown
+ /// | * a
+ /// > | > b
+ /// ^
+ /// ```
+ Prefix,
+ /// After everything.
+ ///
+ /// ```markdown
+ /// > | * a
+ /// ^
+ /// ```
+ Eof,
}
+/// State needed to parse document.
struct DocumentInfo {
+ /// Number of containers that have continued.
continued: usize,
+ /// Index into `tokenizer.events` we need to track.
index: usize,
- paragraph_before: bool,
- interrupt_before: bool,
+ /// Events of containers added back later.
inject: Vec<(Vec<Event>, Vec<Event>)>,
- stack: Vec<Container>,
- states: Vec<ContainerState>,
- stack_close: Vec<Container>,
+ /// The value of the previous line of flow’s `interrupt`.
+ interrupt_before: bool,
+ /// Whether the previous line of flow was a paragraph.
+ paragraph_before: bool,
+ /// Current containers.
+ stack: Vec<ContainerState>,
+ /// Current flow state function.
next: Box<StateFn>,
}
@@ -82,6 +114,13 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec
result.0
}
+/// Before document.
+//
+/// ```markdown
+/// > | * a
+/// ^
+/// | > b
+/// ```
fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
let info = DocumentInfo {
index: 0,
@@ -91,15 +130,19 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
paragraph_before: false,
interrupt_before: false,
stack: vec![],
- states: vec![],
- stack_close: vec![],
};
line_start(tokenizer, code, info)
}
-/// Start of a new line.
+/// Start of a line.
+//
+/// ```markdown
+/// > | * a
+/// ^
+/// > | > b
+/// ^
+/// ```
fn line_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
- println!("line_start");
info.index = tokenizer.events.len();
info.inject.push((vec![], vec![]));
info.continued = 0;
@@ -109,34 +152,26 @@ fn line_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) ->
}
/// Before existing containers.
+//
+/// ```markdown
+/// | * a
+/// > | > b
+/// ^
+/// ```
fn container_existing_before(
tokenizer: &mut Tokenizer,
code: Code,
mut info: DocumentInfo,
) -> StateFnResult {
- println!("container_existing_before");
-
- // First we iterate through the open blocks, starting with the root
- // document, and descending through last children down to the last open
- // block.
- // Each block imposes a condition that the line must satisfy if the block
- // is to remain open.
- // For example, a block quote requires a `>` character.
- // A paragraph requires a non-blank line.
- // In this phase we may match all or just some of the open blocks.
- // But we cannot close unmatched blocks yet, because we may have a lazy
- // continuation line.
+ // If there are more existing containers, check whether the next one continues.
if info.continued < info.stack.len() {
- let kind = &info.stack[info.continued];
- let container = info.states.remove(info.continued);
- tokenizer.container = Some(container);
- let cont = match kind {
+ let container = info.stack.remove(info.continued);
+ let cont = match container.kind {
Container::BlockQuote => block_quote_cont,
Container::ListItem => list_item_const,
};
- // tokenizer.container = Some(&mut info.states[info.continued]);
- // To do: state?
+ tokenizer.container = Some(container);
tokenizer.attempt(cont, move |ok| {
if ok {
Box::new(|t, c| container_existing_after(t, c, info))
@@ -144,97 +179,124 @@ fn container_existing_before(
Box::new(|t, c| container_existing_missing(t, c, info))
}
})(tokenizer, code)
- } else {
- // Done.
+ }
+ // Otherwise, check new containers.
+ else {
container_new_before(tokenizer, code, info)
}
}
+/// At a missing, existing containers.
+//
+/// ```markdown
+/// | * a
+/// > | > b
+/// ^
+/// ```
fn container_existing_missing(
tokenizer: &mut Tokenizer,
code: Code,
mut info: DocumentInfo,
) -> StateFnResult {
let container = tokenizer.container.take().unwrap();
- info.states.insert(info.continued, container);
+ info.stack.insert(info.continued, container);
container_new_before(tokenizer, code, info)
}
+/// After an existing container.
+//
+/// ```markdown
+/// | * a
+/// > | b
+/// ^
+/// ```
fn container_existing_after(
tokenizer: &mut Tokenizer,
code: Code,
mut info: DocumentInfo,
) -> StateFnResult {
- println!("container_existing_after");
let container = tokenizer.container.take().unwrap();
- info.states.insert(info.continued, container);
+ info.stack.insert(info.continued, container);
info.continued += 1;
container_existing_before(tokenizer, code, info)
}
+/// Before a new container.
+//
+/// ```markdown
+/// > | * a
+/// ^
+/// > | > b
+/// ^
+/// ```
fn container_new_before(
tokenizer: &mut Tokenizer,
code: Code,
info: DocumentInfo,
) -> StateFnResult {
- println!("container_new_before");
- // Next, after consuming the continuation markers for existing blocks, we
- // look for new block starts (e.g. `>` for a block quote).
- // If we encounter a new block start, we close any blocks unmatched in
- // step 1 before creating the new block as a child of the last matched
- // block.
+ // If we have completely continued, restore the flow’s past `interrupt`
+ // status.
if info.continued == info.stack.len() {
- // If we have concrete content, such as block HTML or fenced code,
- // we can’t have containers “pierce” into them, so we can immediately
- // start.
+ tokenizer.interrupt = info.interrupt_before;
+
+ // …and if we’re in a concrete construct, new containers can’t “pierce”
+ // into them.
if tokenizer.concrete {
- println!(" concrete");
return containers_after(tokenizer, code, info);
}
-
- println!(
- " set interrupt to {:?} because we have continued (was: {:?})",
- info.interrupt_before, tokenizer.interrupt
- );
- tokenizer.interrupt = info.interrupt_before;
-
- // // If we do have flow, it could still be a blank line,
- // // but we’d be interrupting it w/ a new container if there’s a current
- // // construct.
- // self.interrupt = Boolean(
- // childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
- // )
}
- tokenizer.container = Some(ContainerState::default());
- // Check if there is a new container.
+ // Check for a new container.
+ // Block quote?
+ tokenizer.container = Some(ContainerState {
+ kind: Container::BlockQuote,
+ blank_initial: false,
+ size: 0,
+ });
+
tokenizer.attempt(block_quote, move |ok| {
if ok {
- Box::new(|t, c| container_new_after(t, c, info, Container::BlockQuote))
+ Box::new(|t, c| container_new_after(t, c, info))
} else {
Box::new(|tokenizer, code| {
- tokenizer.container = Some(ContainerState::default());
- tokenizer.attempt(list_item, move |ok| {
- if ok {
- Box::new(|t, c| container_new_after(t, c, info, Container::ListItem))
+ // List item?
+ tokenizer.container = Some(ContainerState {
+ kind: Container::ListItem,
+ blank_initial: false,
+ size: 0,
+ });
+
+ tokenizer.attempt(list_item, |ok| {
+ let func = if ok {
+ container_new_after
} else {
- Box::new(|t, c| containers_after(t, c, info))
- }
+ containers_after
+ };
+ Box::new(move |t, c| func(t, c, info))
})(tokenizer, code)
})
}
})(tokenizer, code)
}
+/// After a new container.
+//
+/// ```markdown
+/// > | * a
+/// ^
+/// > | > b
+/// ^
+/// ```
fn container_new_after(
tokenizer: &mut Tokenizer,
code: Code,
mut info: DocumentInfo,
- kind: Container,
) -> StateFnResult {
+ let container = tokenizer.container.take().unwrap();
+
// Remove from the event stack.
// We’ll properly add exits at different points manually.
- let end = match kind {
+ let end = match container.kind {
Container::BlockQuote => block_quote_end,
Container::ListItem => list_item_end,
};
@@ -261,212 +323,114 @@ fn container_new_after(
index += 1;
}
- if info.continued < info.stack.len() {
- info.stack_close
- .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>());
- info.states.truncate(info.continued);
- info = line_end(tokenizer, info, false, true);
+ // If we did not continue all existing containers, and there is a new one,
+ // close the flow and those containers.
+ if info.continued != info.stack.len() {
+ info = exit_containers(tokenizer, info, &Phase::Prefix);
tokenizer.expect(code, true);
}
- let container = tokenizer.container.take().unwrap();
- info.states.push(container);
- info.stack.push(kind);
- info.continued = info.stack.len(); // To do: `+= 1`?
- println!(
- " set `interrupt`, `info.interrupt_before: false` because we have new containers (before: {:?}, {:?})",
- info.interrupt_before,
- tokenizer.interrupt
- );
+ // Try another new container.
+ info.stack.push(container);
+ info.continued += 1;
info.interrupt_before = false;
- tokenizer.interrupt = info.interrupt_before;
+ tokenizer.interrupt = false;
container_new_before(tokenizer, code, info)
}
+/// After containers, before flow.
+//
+/// ```markdown
+/// > | * a
+/// ^
+/// > | > b
+/// ^
+/// ```
fn containers_after(
tokenizer: &mut Tokenizer,
code: Code,
mut info: DocumentInfo,
) -> StateFnResult {
- println!("containers_after");
-
- // Add all container events we parsed.
- let mut containers = tokenizer.events.drain(info.index..).collect::<Vec<_>>();
- info.inject.last_mut().unwrap().0.append(&mut containers);
+ // Store the container events we parsed.
+ info.inject
+ .last_mut()
+ .unwrap()
+ .0
+ .append(&mut tokenizer.events.drain(info.index..).collect::<Vec<_>>());
tokenizer.lazy = info.continued != info.stack.len();
- println!(
- " restoring interrupt: {:?} (was: {:?})",
- info.interrupt_before, tokenizer.interrupt
- );
tokenizer.interrupt = info.interrupt_before;
-
- // Define start.
tokenizer.define_skip(tokenizer.point.clone(), tokenizer.index);
- flow_start(tokenizer, code, info)
-}
-
-fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
- println!("flow_start");
-
let state = info.next;
- info.next = Box::new(flow); // This is weird but Rust needs a function there.
-
- tokenizer.go_until(state, eol, move |(state, remainder)| {
- (
- State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))),
- remainder,
- )
- })(tokenizer, code)
+ info.next = Box::new(flow);
+
+ // Parse flow, pausing after eols.
+ tokenizer.go_until(
+ state,
+ |code| matches!(code, Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')),
+ move |(state, remainder)| {
+ (
+ State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))),
+ remainder,
+ )
+ },
+ )(tokenizer, code)
}
+/// After flow (after eol or at eof).
+//
+/// ```markdown
+/// | * a
+/// > | > b
+/// ^ ^
+/// ```
fn flow_end(
tokenizer: &mut Tokenizer,
code: Code,
mut info: DocumentInfo,
result: State,
) -> StateFnResult {
- println!("flow_end: lazy? {:?}", tokenizer.lazy);
-
- // To do: clean this!
- let index = tokenizer.events.len();
- let index = if index > 0 {
- skip::opt_back(&tokenizer.events, index - 1, &[Token::LineEnding])
- } else {
- 0
- };
-
- let paragraph = if index > 0 {
- let ev = &tokenizer.events[index];
- ev.point.offset + 1 >= tokenizer.point.offset
- && ev.token_type == Token::Paragraph
- && !(matches!(
- tokenizer.previous,
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
- ) && matches!(code, Code::None))
- } else {
- false
- };
-
- let mut lazy = false;
-
- if tokenizer.lazy {
- println!("this line was lazy.");
-
- if info.paragraph_before && paragraph {
- println!("it was another paragraph, which is allowed.");
- lazy = true;
- } else {
- println!(
- "it was something else (prev: {:?}, cur: {:?}), which is not allowed.",
- info.paragraph_before, paragraph
- );
- }
+ let paragraph = !tokenizer.events.is_empty()
+ && tokenizer.events[skip::opt_back(
+ &tokenizer.events,
+ tokenizer.events.len() - 1,
+ &[Token::LineEnding],
+ )]
+ .token_type
+ == Token::Paragraph;
+
+ if tokenizer.lazy
+ && info.paragraph_before
+ && paragraph
+ && !(matches!(
+ tokenizer.previous,
+ Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
+ ) && matches!(code, Code::None))
+ {
+ info.continued = info.stack.len();
}
- if !lazy && info.continued < info.stack.len() {
- info.stack_close
- .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>());
- info.states.truncate(info.continued);
+ if info.continued != info.stack.len() {
+ info = exit_containers(tokenizer, info, &Phase::After);
+ tokenizer.expect(code, true);
}
- info = line_end(tokenizer, info, false, false);
- tokenizer.expect(code, true);
-
info.paragraph_before = paragraph;
info.interrupt_before = tokenizer.interrupt;
match result {
State::Ok => {
- info.stack_close
- .append(&mut info.stack.drain(..).collect::<Vec<_>>());
- info = line_end(tokenizer, info, true, false);
-
- let mut map = EditMap::new();
- let mut line_index = 0;
- let mut index = 0;
-
- println!("injections: {:#?}", info.inject);
-
- let add = info.inject[line_index].0.clone();
- let mut first_line_ending_in_run: Option<usize> = None;
- println!("inject:enters:0: {:?}", add.len());
- map.add(0, 0, add);
-
- while index < tokenizer.events.len() {
- let event = &tokenizer.events[index];
-
- if event.token_type == Token::LineEnding
- || event.token_type == Token::BlankLineEnding
- {
- if event.event_type == EventType::Enter {
- first_line_ending_in_run = first_line_ending_in_run.or(Some(index));
- let mut add = info.inject[line_index].1.clone();
- let mut index = 0;
- while index < add.len() {
- add[index].point = event.point.clone();
- add[index].index = event.index;
- index += 1;
- }
- if !add.is_empty() {
- println!(
- "inject:exits:at-{:?}: {:?}",
- first_line_ending_in_run,
- add.len()
- );
- map.add(first_line_ending_in_run.unwrap(), 0, add);
- }
- } else {
- line_index += 1;
- let add = info.inject[line_index].0.clone();
- if !add.is_empty() {
- // No longer empty.
- first_line_ending_in_run = None;
- println!("inject:enters:at-{:?}: {:?}", index + 1, add.len());
- map.add(index + 1, 0, add);
- }
- }
- } else if event.token_type == Token::SpaceOrTab {
- // Empty to allow whitespace in blank lines.
- } else {
- first_line_ending_in_run = None;
- }
-
- index += 1;
+ if !info.stack.is_empty() {
+ info.continued = 0;
+ info = exit_containers(tokenizer, info, &Phase::Eof);
}
- let mut add = info.inject[line_index].1.clone();
- println!("inject:exits:tail-{:?}: {:?}", index, add.len());
- let mut deep_index = 0;
- while deep_index < add.len() {
- add[deep_index].point = tokenizer.point.clone();
- add[deep_index].index = tokenizer.index;
- deep_index += 1;
- }
- map.add(index, 0, add);
-
- tokenizer.events = map.consume(&mut tokenizer.events);
- let mut index = 0;
-
- println!("document:after: {:?}", tokenizer.events.len());
- while index < tokenizer.events.len() {
- let event = &tokenizer.events[index];
- println!(
- "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
- index,
- event.event_type,
- event.token_type,
- event.content_type,
- event.previous,
- event.next
- );
- index += 1;
- }
+ tokenizer.events = resolve(tokenizer, &info);
(State::Ok, Some(vec![code]))
}
- State::Nok => unreachable!("handle nok in `flow`?"),
+ State::Nok => unreachable!("unexpected `nok` from flow"),
State::Fn(func) => {
info.next = func;
line_start(tokenizer, code, info)
@@ -474,22 +438,16 @@ fn flow_end(
}
}
-fn line_end(
+/// Close containers (and flow if needed).
+fn exit_containers(
tokenizer: &mut Tokenizer,
mut info: DocumentInfo,
- eof: bool,
- containers_before: bool,
+ phase: &Phase,
) -> DocumentInfo {
- let mut stack_close = info.stack_close.drain(..).collect::<Vec<_>>();
- println!("line_end: {:?}", stack_close);
-
- if stack_close.is_empty() {
- return info;
- }
+ let mut stack_close = info.stack.drain(info.continued..).collect::<Vec<_>>();
// So, we’re at the end of a line, but we need to close the *previous* line.
- if !eof {
- println!("closing previous flow");
+ if *phase != Phase::Eof {
tokenizer.define_skip(tokenizer.point.clone(), tokenizer.index);
let mut current_events = tokenizer.events.drain(info.index..).collect::<Vec<_>>();
let next = info.next;
@@ -498,7 +456,7 @@ fn line_end(
assert!(matches!(result.0, State::Ok));
assert!(result.1.is_none());
- if containers_before {
+ if *phase == Phase::Prefix {
info.index = tokenizer.events.len();
}
@@ -508,8 +466,8 @@ fn line_end(
let mut exits: Vec<Event> = vec![];
while !stack_close.is_empty() {
- let kind = stack_close.pop().unwrap();
- let end = match kind {
+ let container = stack_close.pop().unwrap();
+ let end = match container.kind {
Container::BlockQuote => block_quote_end,
Container::ListItem => list_item_end,
};
@@ -535,18 +493,69 @@ fn line_end(
}
}
- let index = info.inject.len() - (if eof { 1 } else { 2 });
+ let index = info.inject.len() - (if *phase == Phase::Eof { 1 } else { 2 });
info.inject[index].1.append(&mut exits);
-
- println!(
- " setting `info.interrupt_before: false` (before: {:?})",
- info.interrupt_before
- );
info.interrupt_before = false;
info
}
-fn eol(code: Code) -> bool {
- matches!(code, Code::CarriageReturnLineFeed | Code::Char('\n' | '\r'))
+// Inject the container events.
+fn resolve(tokenizer: &mut Tokenizer, info: &DocumentInfo) -> Vec<Event> {
+ let mut map = EditMap::new();
+ let mut line_index = 0;
+ let mut index = 0;
+
+ let add = info.inject[line_index].0.clone();
+ let mut first_line_ending_in_run: Option<usize> = None;
+ map.add(0, 0, add);
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.token_type == Token::LineEnding || event.token_type == Token::BlankLineEnding {
+ if event.event_type == EventType::Enter {
+ first_line_ending_in_run = first_line_ending_in_run.or(Some(index));
+ let mut add = info.inject[line_index].1.clone();
+ let mut index = 0;
+ while index < add.len() {
+ add[index].point = event.point.clone();
+ add[index].index = event.index;
+ index += 1;
+ }
+ if !add.is_empty() {
+ map.add(first_line_ending_in_run.unwrap(), 0, add);
+ }
+ } else {
+ line_index += 1;
+ let add = info.inject[line_index].0.clone();
+ if !add.is_empty() {
+ // No longer empty.
+ first_line_ending_in_run = None;
+ map.add(index + 1, 0, add);
+ }
+ }
+ } else if event.token_type == Token::SpaceOrTab {
+ // Empty to allow whitespace in blank lines.
+ } else {
+ first_line_ending_in_run = None;
+ }
+
+ index += 1;
+ }
+
+ let mut add = info.inject[line_index].1.clone();
+ let mut index = 0;
+ while index < add.len() {
+ add[index].point = tokenizer.point.clone();
+ add[index].index = tokenizer.index;
+ index += 1;
+ }
+ map.add(
+ first_line_ending_in_run.unwrap_or(tokenizer.events.len()),
+ 0,
+ add,
+ );
+
+ map.consume(&mut tokenizer.events)
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 25efaac..17622e7 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -130,12 +130,21 @@ pub struct Media {
pub id: String,
}
+/// Supported containers.
+#[derive(Debug, PartialEq)]
+pub enum Container {
+ BlockQuote,
+ ListItem,
+}
+
/// Info used to tokenize the current container.
///
/// This info is shared between the initial construct and its continuation.
/// It’s only used for list items.
-#[derive(Default, Debug)]
+#[derive(Debug)]
pub struct ContainerState {
+ /// Kind.
+ pub kind: Container,
/// Whether the first (and all future) lines were blank.
pub blank_initial: bool,
/// The size of the initial construct.