From 8183323c432fc1359c634feb68bc372e13f0bd03 Mon Sep 17 00:00:00 2001
From: Titus Wormer
Date: Thu, 14 Jul 2022 17:40:10 +0200
Subject: Add support for container state
* Fix to parse list item continuation based on how big the initial
list item prefix was
* Fix list items that start with blank lines
---
src/compiler.rs | 21 +++++++++--
src/construct/list.rs | 95 +++++++++++++++++++++++++++----------------------
src/content/document.rs | 39 +++++++++++++++-----
src/tokenizer.rs | 10 ++++++
src/util/skip.rs | 18 +++++++---
src/util/span.rs | 4 +--
6 files changed, 125 insertions(+), 62 deletions(-)
(limited to 'src')
diff --git a/src/compiler.rs b/src/compiler.rs
index 37229a4..51c7e2b 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -1150,7 +1150,9 @@ fn on_exit_media(context: &mut CompileContext) {
fn on_exit_paragraph(context: &mut CompileContext) {
let tight = context.tight_stack.last().unwrap_or(&false);
- if !tight {
+ if *tight {
+ context.slurp_one_line_ending = true;
+ } else {
context.tag("
".to_string());
}
}
@@ -1216,6 +1218,14 @@ fn on_enter_list(context: &mut CompileContext) {
if balance < 3 && event.token_type == Token::BlankLineEnding
// && !(balance == 1 && events[index - 2].token_type == Token::ListItem)
{
+ let at_marker = balance == 2
+ && events[skip::opt_back(
+ events,
+ index - 2,
+ &[Token::BlankLineEnding, Token::SpaceOrTab],
+ )]
+ .token_type
+ == Token::ListItemPrefix;
let at_list_item = balance == 1 && events[index - 2].token_type == Token::ListItem;
let at_empty_list_item = if at_list_item {
let before_item = skip::opt_back(events, index - 2, &[Token::ListItem]);
@@ -1229,7 +1239,7 @@ fn on_enter_list(context: &mut CompileContext) {
false
};
- if !at_list_item || !at_empty_list_item {
+ if !at_marker && (!at_list_item || !at_empty_list_item) {
loose = true;
break;
}
@@ -1297,7 +1307,12 @@ fn on_exit_list_item(context: &mut CompileContext) {
let before_item = skip::opt_back(
context.events,
context.index - 1,
- &[Token::BlankLineEnding, Token::LineEnding, Token::SpaceOrTab],
+ &[
+ Token::BlankLineEnding,
+ Token::LineEnding,
+ Token::SpaceOrTab,
+ Token::BlockQuotePrefix,
+ ],
);
let previous = &context.events[before_item];
let tight_paragraph = *tight && previous.token_type == Token::Paragraph;
diff --git a/src/construct/list.rs b/src/construct/list.rs
index d06eaf0..bab821c 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -99,6 +99,7 @@ impl Kind {
/// To do.
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.enter(Token::ListItem);
// To do: allow arbitrary when code (indented) is turned off.
tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
}
@@ -108,12 +109,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
// Unordered.
Code::Char('*' | '+' | '-') => tokenizer.check(thematic_break, |ok| {
- let func = if ok { nok } else { before_unordered };
- Box::new(func)
+ Box::new(if ok { nok } else { before_unordered })
})(tokenizer, code),
// Ordered.
Code::Char(char) if char.is_ascii_digit() => {
- tokenizer.enter(Token::ListItem);
tokenizer.enter(Token::ListItemPrefix);
tokenizer.enter(Token::ListItemValue);
// To do: `interrupt || !1`?
@@ -125,8 +124,6 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// To do.
fn before_unordered(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- // To do: check if this is a thematic break?
- tokenizer.enter(Token::ListItem);
tokenizer.enter(Token::ListItemPrefix);
marker(tokenizer, code)
}
@@ -163,7 +160,6 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
let interrupt = tokenizer.interrupt;
tokenizer.check(blank_line, move |ok| {
- println!("check:blank_line:after {:?} {:?}", ok, interrupt);
let func = if ok {
if interrupt {
nok
@@ -179,9 +175,12 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// To do.
fn on_blank(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ if let Some(container) = tokenizer.container.as_mut() {
+ container.blank_initial = true;
+ }
+
// self.containerState.initialBlankLine = true
- // initialSize++
- prefix_end(tokenizer, code)
+ prefix_end(tokenizer, code, true)
}
/// To do.
@@ -189,8 +188,11 @@ fn marker_after_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
let interrupt = tokenizer.interrupt;
tokenizer.attempt(list_item_prefix_whitespace, move |ok| {
println!("marker:after:after: {:?} {:?}", ok, interrupt);
- let func = if ok { prefix_end } else { prefix_other };
- Box::new(func)
+ if ok {
+ Box::new(|t, c| prefix_end(t, c, false))
+ } else {
+ Box::new(prefix_other)
+ }
})(tokenizer, code)
}
@@ -203,15 +205,25 @@ fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(Token::SpaceOrTab);
tokenizer.consume(code);
tokenizer.exit(Token::SpaceOrTab);
- (State::Fn(Box::new(prefix_end)), None)
+ (State::Fn(Box::new(|t, c| prefix_end(t, c, false))), None)
}
_ => (State::Nok, None),
}
}
/// To do.
-fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- // To do: calculate size.
+fn prefix_end(tokenizer: &mut Tokenizer, code: Code, blank: bool) -> StateFnResult {
+ let start = skip::to_back(
+ &tokenizer.events,
+ tokenizer.events.len() - 1,
+ &[Token::ListItem],
+ );
+ let prefix = tokenizer.index - tokenizer.events[start].index + (if blank { 1 } else { 0 });
+
+ if let Some(container) = tokenizer.container.as_mut() {
+ container.size = prefix;
+ }
+
tokenizer.exit(Token::ListItemPrefix);
tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve));
(State::Ok, Some(vec![code]))
@@ -221,14 +233,17 @@ fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
fn list_item_prefix_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
// To do: check how big this should be?
tokenizer.go(
- space_or_tab_min_max(1, TAB_SIZE - 1),
+ space_or_tab_min_max(1, TAB_SIZE),
list_item_prefix_whitespace_after,
)(tokenizer, code)
}
fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- // To do: check some stuff?
- (State::Ok, Some(vec![code]))
+ if matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) {
+ (State::Nok, None)
+ } else {
+ (State::Ok, Some(vec![code]))
+ }
}
/// To do.
@@ -240,46 +255,40 @@ fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult {
pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.check(blank_line, |ok| {
println!("cont:check:blank:after: {:?}", ok);
- let func = if ok { blank_cont } else { not_blank_cont };
- Box::new(func)
+ Box::new(if ok { blank_cont } else { not_blank_cont })
})(tokenizer, code)
}
pub fn blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- // self.containerState.furtherBlankLines =
- // self.containerState.furtherBlankLines ||
- // self.containerState.initialBlankLine
+ let mut size = 0;
+ if let Some(container) = tokenizer.container.as_ref() {
+ size = container.size;
+
+ if container.blank_initial {
+ return (State::Nok, None);
+ }
+ }
// We have a blank line.
// Still, try to consume at most the items size.
// To do: eat at most `size` whitespace.
- tokenizer.go(space_or_tab_min_max(0, TAB_SIZE), blank_cont_after)(tokenizer, code)
-}
-
-pub fn blank_cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- println!("cont: blank: after");
- (State::Ok, Some(vec![code]))
+ tokenizer.go(space_or_tab_min_max(0, size), cont_after)(tokenizer, code)
}
pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let index = tokenizer.events.len();
- let currently_blank =
- index > 0 && tokenizer.events[index - 1].token_type == Token::BlankLineEnding;
- let mut further_blank = false;
-
- if currently_blank && index > 5 {
- let before = skip::opt_back(&tokenizer.events, index - 3, &[Token::SpaceOrTab]);
- further_blank = tokenizer.events[before].token_type == Token::BlankLineEnding;
- }
+ let mut size = 0;
- if further_blank || !matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) {
- println!("cont: not blank after further blank, or not blank w/o whitespace");
- (State::Nok, None)
- } else {
- println!("cont: not blank");
- // To do: eat exactly `size` whitespace.
- tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), blank_cont_after)(tokenizer, code)
+ if let Some(container) = tokenizer.container.as_mut() {
+ container.blank_initial = false;
+ size = container.size;
}
+
+ tokenizer.go(space_or_tab_min_max(size, size), cont_after)(tokenizer, code)
+}
+
+pub fn cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ println!("cont: blank: after");
+ (State::Ok, Some(vec![code]))
}
/// To do.
diff --git a/src/content/document.rs b/src/content/document.rs
index f6b8f55..bec0039 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -16,7 +16,9 @@ use crate::content::flow::start as flow;
use crate::parser::ParseState;
use crate::subtokenize::subtokenize;
use crate::token::Token;
-use crate::tokenizer::{Code, Event, EventType, Point, State, StateFn, StateFnResult, Tokenizer};
+use crate::tokenizer::{
+ Code, ContainerState, Event, EventType, Point, State, StateFn, StateFnResult, Tokenizer,
+};
use crate::util::edit_map::EditMap;
use crate::util::{
normalize_identifier::normalize_identifier,
@@ -37,6 +39,7 @@ struct DocumentInfo {
paragraph_before: bool,
inject: Vec<(Vec, Vec)>,
stack: Vec,
+ states: Vec,
stack_close: Vec,
next: Box,
}
@@ -86,6 +89,7 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
next: Box::new(flow),
paragraph_before: false,
stack: vec![],
+ states: vec![],
stack_close: vec![],
};
line_start(tokenizer, code, info)
@@ -104,7 +108,7 @@ fn line_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) ->
fn container_existing_before(
tokenizer: &mut Tokenizer,
code: Code,
- info: DocumentInfo,
+ mut info: DocumentInfo,
) -> StateFnResult {
println!("container_existing_before");
@@ -120,17 +124,20 @@ fn container_existing_before(
// continuation line.
if info.continued < info.stack.len() {
let kind = &info.stack[info.continued];
+ let container = info.states.remove(info.continued);
+ tokenizer.container = Some(container);
let cont = match kind {
Container::BlockQuote => block_quote_cont,
Container::ListItem => list_item_const,
};
+ // tokenizer.container = Some(&mut info.states[info.continued]);
// To do: state?
tokenizer.attempt(cont, move |ok| {
if ok {
Box::new(|t, c| container_existing_after(t, c, info))
} else {
- Box::new(|t, c| container_new_before(t, c, info))
+ Box::new(|t, c| container_existing_missing(t, c, info))
}
})(tokenizer, code)
} else {
@@ -139,12 +146,24 @@ fn container_existing_before(
}
}
+fn container_existing_missing(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ mut info: DocumentInfo,
+) -> StateFnResult {
+ let container = tokenizer.container.take().unwrap();
+ info.states.insert(info.continued, container);
+ container_new_before(tokenizer, code, info)
+}
+
fn container_existing_after(
tokenizer: &mut Tokenizer,
code: Code,
mut info: DocumentInfo,
) -> StateFnResult {
println!("container_existing_after");
+ let container = tokenizer.container.take().unwrap();
+ info.states.insert(info.continued, container);
info.continued += 1;
container_existing_before(tokenizer, code, info)
}
@@ -179,17 +198,16 @@ fn container_new_before(
// self.interrupt = Boolean(
// childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
// )
- } else {
- // println!(" set interrupt to `false`! (before: {:?})", tokenizer.interrupt);
- // tokenizer.interrupt = false;
}
+ tokenizer.container = Some(ContainerState::default());
// Check if there is a new container.
tokenizer.attempt(block_quote, move |ok| {
if ok {
Box::new(|t, c| container_new_after(t, c, info, Container::BlockQuote))
} else {
Box::new(|tokenizer, code| {
+ tokenizer.container = Some(ContainerState::default());
tokenizer.attempt(list_item, move |ok| {
if ok {
Box::new(|t, c| container_new_after(t, c, info, Container::ListItem))
@@ -240,12 +258,15 @@ fn container_new_after(
if info.continued < info.stack.len() {
info.stack_close
.append(&mut info.stack.drain(info.continued..).collect::>());
+ info.states.truncate(info.continued);
info = line_end(tokenizer, info, false, true);
tokenizer.expect(code, true);
}
+ let container = tokenizer.container.take().unwrap();
+ info.states.push(container);
info.stack.push(kind);
- info.continued = info.stack.len();
+ info.continued = info.stack.len(); // To do: `+= 1`?
container_new_before(tokenizer, code, info)
}
@@ -261,7 +282,6 @@ fn containers_after(
info.inject.last_mut().unwrap().0.append(&mut containers);
tokenizer.lazy = info.continued != info.stack.len();
- println!("lazy: {:?} {:?}", info.continued, info.stack.len());
// Define start.
let point = tokenizer.point.clone();
@@ -331,6 +351,7 @@ fn flow_end(
if !lazy && info.continued < info.stack.len() {
info.stack_close
.append(&mut info.stack.drain(info.continued..).collect::>());
+ info.states.truncate(info.continued);
}
info = line_end(tokenizer, info, false, false);
@@ -500,7 +521,7 @@ fn line_end(
info.inject[index].1.append(&mut exits);
println!(
- " setting `interrupt: false` (before: {:?}",
+ " setting `interrupt: false` (before: {:?})",
tokenizer.interrupt
);
tokenizer.interrupt = false;
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 163c2bf..34cfde3 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -130,6 +130,13 @@ pub struct Media {
pub id: String,
}
+/// To do.
+#[derive(Default, Debug)]
+pub struct ContainerState {
+ pub blank_initial: bool,
+ pub size: usize,
+}
+
/// The internal state of a tokenizer, not to be confused with states from the
/// state machine, this instead is all the information about where we currently
/// are and what’s going on.
@@ -203,6 +210,8 @@ pub struct Tokenizer<'a> {
pub concrete: bool,
/// To do.
pub lazy: bool,
+ /// To do.
+ pub container: Option,
}
impl<'a> Tokenizer<'a> {
@@ -225,6 +234,7 @@ impl<'a> Tokenizer<'a> {
interrupt: false,
concrete: false,
lazy: false,
+ container: None,
resolvers: vec![],
resolver_ids: vec![],
}
diff --git a/src/util/skip.rs b/src/util/skip.rs
index 10ba364..d2ad914 100644
--- a/src/util/skip.rs
+++ b/src/util/skip.rs
@@ -5,15 +5,23 @@ use crate::tokenizer::{Event, EventType};
/// Skip from `index`, optionally past `token_types`.
pub fn opt(events: &[Event], index: usize, token_types: &[Token]) -> usize {
- skip_opt_with_direction(events, index, token_types, true)
+ skip_opt_impl(events, index, token_types, true)
}
/// Skip from `index`, optionally past `token_types`, backwards.
pub fn opt_back(events: &[Event], index: usize, token_types: &[Token]) -> usize {
- skip_opt_with_direction(events, index, token_types, false)
+ skip_opt_impl(events, index, token_types, false)
}
-pub fn to(events: &[Event], mut index: usize, token_types: &[Token]) -> usize {
+pub fn to_back(events: &[Event], index: usize, token_types: &[Token]) -> usize {
+ to_impl(events, index, token_types, false)
+}
+
+pub fn to(events: &[Event], index: usize, token_types: &[Token]) -> usize {
+ to_impl(events, index, token_types, true)
+}
+
+pub fn to_impl(events: &[Event], mut index: usize, token_types: &[Token], forward: bool) -> usize {
while index < events.len() {
let current = &events[index].token_type;
@@ -21,14 +29,14 @@ pub fn to(events: &[Event], mut index: usize, token_types: &[Token]) -> usize {
break;
}
- index += 1;
+ index = if forward { index + 1 } else { index - 1 };
}
index
}
/// Skip internals.
-fn skip_opt_with_direction(
+fn skip_opt_impl(
events: &[Event],
mut index: usize,
token_types: &[Token],
diff --git a/src/util/span.rs b/src/util/span.rs
index 32dd00f..72b451d 100644
--- a/src/util/span.rs
+++ b/src/util/span.rs
@@ -6,9 +6,9 @@ use crate::util::codes::serialize as serialize_codes;
/// A struct representing the span of an opening and closing event of a token.
#[derive(Debug)]
pub struct Span {
- /// Absolute offset (and `index` in `codes`) of where this span starts.
+ /// Absolute offset (an `index` in `codes`) of where this span starts.
pub start_index: usize,
- /// Absolute offset (and `index` in `codes`) of where this span ends.
+ /// Absolute offset (an `index` in `codes`) of where this span ends.
pub end_index: usize,
}
--
cgit