aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-14 15:01:03 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-14 15:01:03 +0200
commite2c9664b0d63ec686f9e4625ac11bb21720f74dc (patch)
tree351b09f44bad5dcf9e077407495cd455c67b99c2 /src
parentfb185e801e7d2002948d0b4e91ee06767e13ed00 (diff)
downloadmarkdown-rs-e2c9664b0d63ec686f9e4625ac11bb21720f74dc.tar.gz
markdown-rs-e2c9664b0d63ec686f9e4625ac11bb21720f74dc.tar.bz2
markdown-rs-e2c9664b0d63ec686f9e4625ac11bb21720f74dc.zip
Fix a bunch of container things
* Fix a lot of container code * Fix to prevent code (indented) from interrupting a lazy container * Fix compiling when combining tight paragraphs, empty list items, and html * Fix list items starting w/ blank lines causing loose lists * Fix crash when looking for a previous blank line
Diffstat (limited to '')
-rw-r--r--src/compiler.rs55
-rw-r--r--src/construct/html_flow.rs7
-rw-r--r--src/construct/list.rs4
-rw-r--r--src/content/document.rs332
4 files changed, 221 insertions, 177 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 4b38c8d..37229a4 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -8,6 +8,7 @@ use crate::util::{
decode_character_reference::{decode_named, decode_numeric},
encode::encode,
sanitize_uri::sanitize_uri,
+ skip,
span::{codes as codes_from_span, from_exit_event, serialize},
};
use std::collections::HashMap;
@@ -241,7 +242,6 @@ struct CompileContext<'a> {
pub tight_stack: Vec<bool>,
/// Fields used to influance the current compilation.
pub slurp_one_line_ending: bool,
- pub slurp_all_line_endings: bool,
pub tags: bool,
pub ignore_encode: bool,
pub last_was_tag: bool,
@@ -276,7 +276,6 @@ impl<'a> CompileContext<'a> {
definitions: HashMap::new(),
tight_stack: vec![],
slurp_one_line_ending: false,
- slurp_all_line_endings: false,
tags: true,
ignore_encode: false,
last_was_tag: false,
@@ -718,8 +717,6 @@ fn on_enter_paragraph(context: &mut CompileContext) {
context.line_ending_if_needed();
context.tag("<p>".to_string());
}
-
- context.slurp_all_line_endings = false;
}
/// Handle [`Enter`][EventType::Enter]:[`Resource`][Token::Resource].
@@ -785,7 +782,6 @@ fn on_exit_block_quote(context: &mut CompileContext) {
context.tight_stack.pop();
context.line_ending_if_needed();
context.tag("</blockquote>".to_string());
- context.slurp_all_line_endings = false;
}
/// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker].
@@ -1075,9 +1071,7 @@ fn on_exit_label_text(context: &mut CompileContext) {
/// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding].
fn on_exit_line_ending(context: &mut CompileContext) {
- if context.slurp_all_line_endings {
- // Empty.
- } else if context.slurp_one_line_ending {
+ if context.slurp_one_line_ending {
context.slurp_one_line_ending = false;
} else {
context.push(context.encode_opt(&serialize(
@@ -1156,9 +1150,7 @@ fn on_exit_media(context: &mut CompileContext) {
fn on_exit_paragraph(context: &mut CompileContext) {
let tight = context.tight_stack.last().unwrap_or(&false);
- if *tight {
- context.slurp_all_line_endings = true;
- } else {
+ if !tight {
context.tag("</p>".to_string());
}
}
@@ -1218,10 +1210,29 @@ fn on_enter_list(context: &mut CompileContext) {
} else {
balance -= 1;
- // Blank line directly in list or directly in list item.
- if balance < 3 && event.token_type == Token::BlankLineEnding {
- loose = true;
- break;
+ // Blank line directly in list or directly in list item,
+ // but not a blank line after an empty list item.
+ // To do: does this check if the item is empty?
+ if balance < 3 && event.token_type == Token::BlankLineEnding
+ // && !(balance == 1 && events[index - 2].token_type == Token::ListItem)
+ {
+ let at_list_item = balance == 1 && events[index - 2].token_type == Token::ListItem;
+ let at_empty_list_item = if at_list_item {
+ let before_item = skip::opt_back(events, index - 2, &[Token::ListItem]);
+ let before_prefix = skip::opt_back(
+ events,
+ index - 3,
+ &[Token::ListItemPrefix, Token::SpaceOrTab],
+ );
+ before_item + 1 == before_prefix
+ } else {
+ false
+ };
+
+ if !at_list_item || !at_empty_list_item {
+ loose = true;
+ break;
+ }
}
// Done.
@@ -1233,7 +1244,6 @@ fn on_enter_list(context: &mut CompileContext) {
index += 1;
}
- println!("list: {:?} {:?}", token_type, loose);
context.tight_stack.push(!loose);
context.line_ending_if_needed();
// Note: no `>`.
@@ -1283,12 +1293,21 @@ fn on_exit_list_item_value(context: &mut CompileContext) {
/// To do.
fn on_exit_list_item(context: &mut CompileContext) {
- if context.last_was_tag && !context.slurp_all_line_endings {
+ let tight = context.tight_stack.last().unwrap_or(&false);
+ let before_item = skip::opt_back(
+ context.events,
+ context.index - 1,
+ &[Token::BlankLineEnding, Token::LineEnding, Token::SpaceOrTab],
+ );
+ let previous = &context.events[before_item];
+ let tight_paragraph = *tight && previous.token_type == Token::Paragraph;
+ let empty_item = previous.token_type == Token::ListItemPrefix;
+
+ if !tight_paragraph && !empty_item {
context.line_ending_if_needed();
}
context.tag("</li>".to_string());
- context.slurp_all_line_endings = false;
}
/// To do.
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index a8b1efc..3300d2f 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -207,7 +207,6 @@ struct Info {
///
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(Token::HtmlFlow);
- tokenizer.enter(Token::HtmlFlowData);
// To do: allow arbitrary when code (indented) is turned off.
tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
}
@@ -219,6 +218,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
if Code::Char('<') == code {
+ tokenizer.enter(Token::HtmlFlowData);
tokenizer.consume(code);
(State::Fn(Box::new(open)), None)
} else {
@@ -771,11 +771,12 @@ fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
if info.kind == Kind::Basic || info.kind == Kind::Complete =>
{
+ tokenizer.exit(Token::HtmlFlowData);
tokenizer.check(blank_line_before, |ok| {
let func = if ok {
- continuation_close
+ html_continue_after
} else {
- continuation_at_line_ending
+ html_continue_start // continuation_at_line_ending
};
Box::new(move |t, c| func(t, c, info))
})(tokenizer, code)
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 960c0eb..d06eaf0 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -267,7 +267,7 @@ pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
index > 0 && tokenizer.events[index - 1].token_type == Token::BlankLineEnding;
let mut further_blank = false;
- if currently_blank && index > 3 {
+ if currently_blank && index > 5 {
let before = skip::opt_back(&tokenizer.events, index - 3, &[Token::SpaceOrTab]);
further_blank = tokenizer.events[before].token_type == Token::BlankLineEnding;
}
@@ -338,7 +338,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
&& skip::opt(
&tokenizer.events,
previous.3 + 1,
- &[Token::LineEnding, Token::BlankLineEnding],
+ &[Token::SpaceOrTab, Token::LineEnding, Token::BlankLineEnding],
) == current.2
{
println!("prev:match {:?} {:?}", previous, current);
diff --git a/src/content/document.rs b/src/content/document.rs
index b29e4b9..f6b8f55 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -10,7 +10,7 @@
use crate::construct::{
block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote},
- list::{cont as list_const, end as list_end, start as list},
+ list::{cont as list_item_const, end as list_item_end, start as list_item},
};
use crate::content::flow::start as flow;
use crate::parser::ParseState;
@@ -25,12 +25,19 @@ use crate::util::{
};
use std::collections::HashSet;
+#[derive(Debug, PartialEq)]
+enum Container {
+ BlockQuote,
+ ListItem,
+}
+
struct DocumentInfo {
continued: usize,
- containers_begin_index: usize,
+ index: usize,
paragraph_before: bool,
inject: Vec<(Vec<Event>, Vec<Event>)>,
- stack: Vec<String>,
+ stack: Vec<Container>,
+ stack_close: Vec<Container>,
next: Box<StateFn>,
}
@@ -73,18 +80,34 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec
fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
let info = DocumentInfo {
+ index: 0,
continued: 0,
- paragraph_before: false,
inject: vec![],
- containers_begin_index: 0,
- stack: vec![],
next: Box::new(flow),
+ paragraph_before: false,
+ stack: vec![],
+ stack_close: vec![],
};
- before(tokenizer, code, info)
+ line_start(tokenizer, code, info)
+}
+
+/// Start of a new line.
+fn line_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
+ println!("line_start");
+ info.index = tokenizer.events.len();
+ info.inject.push((vec![], vec![]));
+ info.continued = 0;
+ container_existing_before(tokenizer, code, info)
}
-fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult {
- println!("before");
+/// Before existing containers.
+fn container_existing_before(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ info: DocumentInfo,
+) -> StateFnResult {
+ println!("container_existing_before");
+
// First we iterate through the open blocks, starting with the root
// document, and descending through last children down to the last open
// block.
@@ -96,45 +119,42 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR
// But we cannot close unmatched blocks yet, because we may have a lazy
// continuation line.
if info.continued < info.stack.len() {
- let name = &info.stack[info.continued];
- let cont = if name == "blockquote" {
- block_quote_cont
- } else if name == "list" {
- list_const
- } else {
- unreachable!("todo: cont construct {:?}", name)
+ let kind = &info.stack[info.continued];
+ let cont = match kind {
+ Container::BlockQuote => block_quote_cont,
+ Container::ListItem => list_item_const,
};
// To do: state?
tokenizer.attempt(cont, move |ok| {
if ok {
- Box::new(|t, c| document_continue(t, c, info))
+ Box::new(|t, c| container_existing_after(t, c, info))
} else {
- Box::new(|t, c| check_new_containers(t, c, info))
+ Box::new(|t, c| container_new_before(t, c, info))
}
})(tokenizer, code)
} else {
// Done.
- check_new_containers(tokenizer, code, info)
+ container_new_before(tokenizer, code, info)
}
}
-fn document_continue(
+fn container_existing_after(
tokenizer: &mut Tokenizer,
code: Code,
mut info: DocumentInfo,
) -> StateFnResult {
- println!("document_continue");
+ println!("container_existing_after");
info.continued += 1;
- before(tokenizer, code, info)
+ container_existing_before(tokenizer, code, info)
}
-fn check_new_containers(
+fn container_new_before(
tokenizer: &mut Tokenizer,
code: Code,
info: DocumentInfo,
) -> StateFnResult {
- println!("check_new_containers");
+ println!("container_new_before");
// Next, after consuming the continuation markers for existing blocks, we
// look for new block starts (e.g. `>` for a block quote).
// If we encounter a new block start, we close any blocks unmatched in
@@ -146,10 +166,13 @@ fn check_new_containers(
// start.
if tokenizer.concrete {
println!(" concrete");
- return there_is_no_new_container(tokenizer, code, info);
+ return containers_after(tokenizer, code, info);
}
- println!(" to do: interrupt ({:?})?", tokenizer.interrupt);
+ println!(
+ " to do: set interrupt? (before: {:?})",
+ tokenizer.interrupt
+ );
// // If we do have flow, it could still be a blank line,
// // but we’d be interrupting it w/ a new container if there’s a current
// // construct.
@@ -157,20 +180,21 @@ fn check_new_containers(
// childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
// )
} else {
- tokenizer.interrupt = false;
+ // println!(" set interrupt to `false`! (before: {:?})", tokenizer.interrupt);
+ // tokenizer.interrupt = false;
}
// Check if there is a new container.
tokenizer.attempt(block_quote, move |ok| {
if ok {
- Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string()))
+ Box::new(|t, c| container_new_after(t, c, info, Container::BlockQuote))
} else {
Box::new(|tokenizer, code| {
- tokenizer.attempt(list, move |ok| {
+ tokenizer.attempt(list_item, move |ok| {
if ok {
- Box::new(|t, c| there_is_a_new_container(t, c, info, "list".to_string()))
+ Box::new(|t, c| container_new_after(t, c, info, Container::ListItem))
} else {
- Box::new(|t, c| there_is_no_new_container(t, c, info))
+ Box::new(|t, c| containers_after(t, c, info))
}
})(tokenizer, code)
})
@@ -178,25 +202,17 @@ fn check_new_containers(
})(tokenizer, code)
}
-fn there_is_a_new_container(
+fn container_new_after(
tokenizer: &mut Tokenizer,
code: Code,
mut info: DocumentInfo,
- name: String,
+ kind: Container,
) -> StateFnResult {
- let size = info.continued;
- println!("exit:0: {:?}", false);
- info = exit_containers(tokenizer, info, size, false);
- tokenizer.expect(code, true);
-
// Remove from the event stack.
// We’ll properly add exits at different points manually.
- let end = if name == "blockquote" {
- block_quote_end
- } else if name == "list" {
- list_end
- } else {
- unreachable!("todo: end {:?}", name)
+ let end = match kind {
+ Container::BlockQuote => block_quote_end,
+ Container::ListItem => list_item_end,
};
let token_types = end();
@@ -221,118 +237,42 @@ fn there_is_a_new_container(
index += 1;
}
- info.stack.push(name);
- document_continue(tokenizer, code, info)
-}
-
-/// Exit open containers.
-fn exit_containers(
- tokenizer: &mut Tokenizer,
- mut info: DocumentInfo,
- size: usize,
- before: bool,
-) -> DocumentInfo {
- let mut exits: Vec<Event> = vec![];
-
- if info.stack.len() > size {
- println!("closing flow");
- let index = tokenizer.events.len();
- let result = tokenizer.flush(info.next);
- info.next = Box::new(flow); // This is weird but Rust needs a function there.
- assert!(matches!(result.0, State::Ok));
- assert!(result.1.is_none());
-
- let mut end = tokenizer.events.len();
- while end > 0 && end > index {
- if tokenizer.events[end - 1].token_type != Token::LineEnding {
- break;
- }
-
- end -= 1;
- }
-
- let mut add = tokenizer.events.drain(index..end).collect::<Vec<_>>();
-
- exits.append(&mut add);
-
- println!(" setting `interrupt: false`");
- tokenizer.interrupt = false;
- }
-
- while info.stack.len() > size {
- let name = info.stack.pop().unwrap();
-
- let end = if name == "blockquote" {
- block_quote_end
- } else if name == "list" {
- list_end
- } else {
- unreachable!("todo: end {:?}", name)
- };
-
- let token_types = end();
-
- let mut index = 0;
- while index < token_types.len() {
- let token_type = &token_types[index];
-
- exits.push(Event {
- event_type: EventType::Exit,
- token_type: token_type.clone(),
- // Note: positions are fixed later.
- point: tokenizer.point.clone(),
- index: tokenizer.index,
- previous: None,
- next: None,
- content_type: None,
- });
-
- index += 1;
- }
- }
-
- if !exits.is_empty() {
- let before = if before { 1 } else { 0 };
- let mut index = info.inject.len() - 1;
- if before > index {
- // To do: maybe, if this branch happens, it’s a bug?
- println!("inject:0: {:?}", index);
- index = 0;
- } else {
- index -= before;
- println!("inject:set: {:?}", index);
- }
- info.inject[index].1.append(&mut exits);
+ if info.continued < info.stack.len() {
+ info.stack_close
+ .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>());
+ info = line_end(tokenizer, info, false, true);
+ tokenizer.expect(code, true);
}
- info
+ info.stack.push(kind);
+ info.continued = info.stack.len();
+ container_new_before(tokenizer, code, info)
}
-fn there_is_no_new_container(
+fn containers_after(
tokenizer: &mut Tokenizer,
code: Code,
- info: DocumentInfo,
+ mut info: DocumentInfo,
) -> StateFnResult {
- println!("there_is_no_new_container");
- tokenizer.lazy = info.continued != info.stack.len();
- // lineStartOffset = self.now().offset
- flow_start(tokenizer, code, info)
-}
+ println!("containers_after");
-fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
- println!("flow_start");
+ // Add all container events we parsed.
+ let mut containers = tokenizer.events.drain(info.index..).collect::<Vec<_>>();
+ info.inject.last_mut().unwrap().0.append(&mut containers);
- let containers = tokenizer
- .events
- .drain(info.containers_begin_index..)
- .collect::<Vec<_>>();
-
- info.inject.push((containers, vec![]));
+ tokenizer.lazy = info.continued != info.stack.len();
+ println!("lazy: {:?} {:?}", info.continued, info.stack.len());
// Define start.
let point = tokenizer.point.clone();
tokenizer.define_skip(&point);
+ flow_start(tokenizer, code, info)
+}
+
+fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
+ println!("flow_start");
+
let state = info.next;
info.next = Box::new(flow); // This is weird but Rust needs a function there.
@@ -352,6 +292,7 @@ fn flow_end(
) -> StateFnResult {
println!("flow_end: lazy? {:?}", tokenizer.lazy);
+ // To do: clean this!
let index = tokenizer.events.len();
let index = if index > 0 {
skip::opt_back(&tokenizer.events, index - 1, &[Token::LineEnding])
@@ -371,15 +312,14 @@ fn flow_end(
false
};
- let mut continued = info.continued;
- let size = info.stack.len();
+ let mut lazy = false;
if tokenizer.lazy {
println!("this line was lazy.");
if info.paragraph_before && paragraph {
println!("it was another paragraph, which is allowed.");
- continued = size;
+ lazy = true;
} else {
println!(
"it was something else (prev: {:?}, cur: {:?}), which is not allowed.",
@@ -388,27 +328,31 @@ fn flow_end(
}
}
- // Exit containers.
- println!("exit:1: {:?}", true);
- info = exit_containers(tokenizer, info, continued, true);
+ if !lazy && info.continued < info.stack.len() {
+ info.stack_close
+ .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>());
+ }
+
+ info = line_end(tokenizer, info, false, false);
tokenizer.expect(code, true);
- info.continued = 0;
info.paragraph_before = paragraph;
- info.containers_begin_index = tokenizer.events.len();
match result {
State::Ok => {
- println!("exit:3: {:?}", false);
- info = exit_containers(tokenizer, info, 0, false);
- tokenizer.expect(code, true);
+ info.stack_close
+ .append(&mut info.stack.drain(..).collect::<Vec<_>>());
+ info = line_end(tokenizer, info, true, false);
let mut map = EditMap::new();
let mut line_index = 0;
let mut index = 0;
+ println!("injections: {:#?}", info.inject);
+
let add = info.inject[line_index].0.clone();
let mut first_line_ending_in_run: Option<usize> = None;
+ println!("inject:enters:0: {:?}", add.len());
map.add(0, 0, add);
while index < tokenizer.events.len() {
@@ -427,6 +371,11 @@ fn flow_end(
index += 1;
}
if !add.is_empty() {
+ println!(
+ "inject:exits:at-{:?}: {:?}",
+ first_line_ending_in_run,
+ add.len()
+ );
map.add(first_line_ending_in_run.unwrap(), 0, add);
}
} else {
@@ -435,6 +384,7 @@ fn flow_end(
if !add.is_empty() {
// No longer empty.
first_line_ending_in_run = None;
+ println!("inject:enters:at-{:?}: {:?}", index + 1, add.len());
map.add(index + 1, 0, add);
}
}
@@ -448,6 +398,7 @@ fn flow_end(
}
let mut add = info.inject[line_index].1.clone();
+ println!("inject:exits:tail-{:?}: {:?}", index, add.len());
let mut deep_index = 0;
while deep_index < add.len() {
add[deep_index].point = tokenizer.point.clone();
@@ -479,11 +430,84 @@ fn flow_end(
State::Nok => unreachable!("handle nok in `flow`?"),
State::Fn(func) => {
info.next = func;
- before(tokenizer, code, info)
+ line_start(tokenizer, code, info)
}
}
}
+fn line_end(
+ tokenizer: &mut Tokenizer,
+ mut info: DocumentInfo,
+ eof: bool,
+ containers_before: bool,
+) -> DocumentInfo {
+ let mut stack_close = info.stack_close.drain(..).collect::<Vec<_>>();
+ println!("line_end: {:?}", stack_close);
+
+ if stack_close.is_empty() {
+ return info;
+ }
+
+ // So, we’re at the end of a line, but we need to close the *previous* line.
+ if !eof {
+ println!("closing previous flow");
+ tokenizer.define_skip(&tokenizer.point.clone());
+ let mut current_events = tokenizer.events.drain(info.index..).collect::<Vec<_>>();
+ let next = info.next;
+ info.next = Box::new(flow); // This is weird but Rust needs a function there.
+ let result = tokenizer.flush(next);
+ assert!(matches!(result.0, State::Ok));
+ assert!(result.1.is_none());
+
+ if containers_before {
+ info.index = tokenizer.events.len();
+ }
+
+ tokenizer.events.append(&mut current_events);
+ }
+
+ let mut exits: Vec<Event> = vec![];
+
+ while !stack_close.is_empty() {
+ let kind = stack_close.pop().unwrap();
+ let end = match kind {
+ Container::BlockQuote => block_quote_end,
+ Container::ListItem => list_item_end,
+ };
+
+ let token_types = end();
+
+ let mut index = 0;
+ while index < token_types.len() {
+ let token_type = &token_types[index];
+
+ exits.push(Event {
+ event_type: EventType::Exit,
+ token_type: token_type.clone(),
+ // Note: positions are fixed later.
+ point: tokenizer.point.clone(),
+ index: tokenizer.index,
+ previous: None,
+ next: None,
+ content_type: None,
+ });
+
+ index += 1;
+ }
+ }
+
+ let index = info.inject.len() - (if eof { 1 } else { 2 });
+ info.inject[index].1.append(&mut exits);
+
+ println!(
+ " setting `interrupt: false` (before: {:?}",
+ tokenizer.interrupt
+ );
+ tokenizer.interrupt = false;
+
+ info
+}
+
fn eof_eol(code: Code) -> bool {
matches!(
code,