aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-12 17:47:08 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-12 17:47:08 +0200
commit2e3b7abaa9877b658fa4f8f2612acc617dff60bb (patch)
treee823d041521a4af33a7e552ba58f1d4b63335be3 /src
parent75522b867b15b9a400275cfec9a2ead4ff535473 (diff)
downloadmarkdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.tar.gz
markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.tar.bz2
markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.zip
Fix a lot of list things
* Add `ListItem`, `ListOrdered`, and `ListUnordered` tokens * Add support for multiline list items * Add support for tight lists * Fix bug where 10 digit long list item values worked * Fix skip bug when skipping over nested events
Diffstat (limited to 'src')
-rw-r--r--src/compiler.rs91
-rw-r--r--src/construct/list.rs210
-rw-r--r--src/content/document.rs23
-rw-r--r--src/token.rs4
-rw-r--r--src/util/skip.rs41
5 files changed, 313 insertions, 56 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 753d85f..2d42011 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -447,7 +447,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
// To do: sort.
enter_map.insert(Token::ListItemMarker, on_enter_list_item_marker);
- enter_map.insert(Token::List, on_enter_list);
+ enter_map.insert(Token::ListOrdered, on_enter_list);
+ enter_map.insert(Token::ListUnordered, on_enter_list);
let mut exit_map: Map = HashMap::new();
exit_map.insert(Token::AutolinkEmail, on_exit_autolink_email);
@@ -525,8 +526,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
exit_map.insert(Token::ThematicBreak, on_exit_thematic_break);
// To do: sort.
- exit_map.insert(Token::List, on_exit_list);
exit_map.insert(Token::ListItemValue, on_exit_list_item_value);
+ exit_map.insert(Token::ListItem, on_exit_list_item);
+ exit_map.insert(Token::ListOrdered, on_exit_list);
+ exit_map.insert(Token::ListUnordered, on_exit_list);
// Handle one event.
let handle = |context: &mut CompileContext, index: usize| {
@@ -708,8 +711,14 @@ fn on_enter_link(context: &mut CompileContext) {
/// Handle [`Enter`][EventType::Enter]:[`Paragraph`][Token::Paragraph].
fn on_enter_paragraph(context: &mut CompileContext) {
- context.line_ending_if_needed();
- context.tag("<p>".to_string());
+ let tight = context.tight_stack.last().unwrap_or(&false);
+
+ if !tight {
+ context.line_ending_if_needed();
+ context.tag("<p>".to_string());
+ }
+
+ // context.slurp_all_line_endings = false;
}
/// Handle [`Enter`][EventType::Enter]:[`Resource`][Token::Resource].
@@ -1145,7 +1154,11 @@ fn on_exit_media(context: &mut CompileContext) {
/// Handle [`Exit`][EventType::Exit]:[`Paragraph`][Token::Paragraph].
fn on_exit_paragraph(context: &mut CompileContext) {
- context.tag("</p>".to_string());
+ let tight = context.tight_stack.last().unwrap_or(&false);
+
+ if !tight {
+ context.tag("</p>".to_string());
+ }
}
/// Handle [`Exit`][EventType::Exit]:[`ReferenceString`][Token::ReferenceString].
@@ -1187,14 +1200,58 @@ fn on_exit_thematic_break(context: &mut CompileContext) {
}
// To do: sort.
+/// To do (onenterlist{un,}ordered)
+fn on_enter_list(context: &mut CompileContext) {
+ let events = &context.events;
+ let mut index = context.index;
+ let mut balance = 0;
+ let mut loose = false;
+ let token_type = &events[index].token_type;
+
+ while index < events.len() {
+ let event = &events[index];
+
+ if event.event_type == EventType::Enter {
+ balance += 1;
+ } else {
+ balance -= 1;
+
+ // Blank line directly in list or directly in list item.
+ if balance < 3 && event.token_type == Token::BlankLineEnding {
+ loose = true;
+ break;
+ }
+
+ // Done.
+ if balance == 0 && event.token_type == *token_type {
+ break;
+ }
+ }
+
+ index += 1;
+ }
+
+ println!("list: {:?} {:?}", token_type, loose);
+ context.tight_stack.push(!loose);
+ context.line_ending_if_needed();
+ // Note: no `>`.
+ context.tag(format!(
+ "<{}",
+ if *token_type == Token::ListOrdered {
+ "ol"
+ } else {
+ "ul"
+ }
+ ));
+ context.expect_first_item = Some(true);
+}
+
/// To do
fn on_enter_list_item_marker(context: &mut CompileContext) {
let expect_first_item = context.expect_first_item.take().unwrap();
if expect_first_item {
context.tag(">".to_string());
- } else {
- on_exit_list_item(context);
}
context.line_ending_if_needed();
@@ -1204,15 +1261,6 @@ fn on_enter_list_item_marker(context: &mut CompileContext) {
context.last_was_tag = false;
}
-/// To do (onenterlist{un,}ordered)
-fn on_enter_list(context: &mut CompileContext) {
- // To do: !token._loose
- context.tight_stack.push(false);
- context.line_ending_if_needed();
- context.tag("<ol".to_string()); // To do: `ol` / `ul`.
- context.expect_first_item = Some(true);
-}
-
/// To do
fn on_exit_list_item_value(context: &mut CompileContext) {
let expect_first_item = context.expect_first_item.unwrap();
@@ -1232,21 +1280,24 @@ fn on_exit_list_item_value(context: &mut CompileContext) {
}
/// To do.
-/// Note: there is no actual `Token::ListItem`.
fn on_exit_list_item(context: &mut CompileContext) {
// && !context.slurp_all_line_endings
if context.last_was_tag {
context.line_ending_if_needed();
}
- context.tag("</li>".to_string()); // To do: `ol` / `ul`.
+ context.tag("</li>".to_string());
// context.slurp_all_line_endings = false;
}
/// To do.
fn on_exit_list(context: &mut CompileContext) {
- on_exit_list_item(context);
+ let tag_name = if context.events[context.index].token_type == Token::ListOrdered {
+ "ol"
+ } else {
+ "ul"
+ };
context.tight_stack.pop();
context.line_ending();
- context.tag("</ol>".to_string()); // To do: `ol` / `ul`.
+ context.tag(format!("</{}>", tag_name));
}
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 96b2496..b4ae9b1 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -1,14 +1,21 @@
//! To do.
use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};
-use crate::construct::partial_space_or_tab::space_or_tab_min_max;
+use crate::construct::{
+ blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max,
+};
use crate::token::Token;
-use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
+use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, Tokenizer};
+use crate::util::{
+ edit_map::EditMap,
+ skip,
+ span::{codes as codes_from_span, from_exit_event},
+};
-/// Type of title.
+/// Type of list.
#[derive(Debug, PartialEq)]
enum Kind {
- /// In a dot (`.`) list.
+ /// In a dot (`.`) list item.
///
/// ## Example
///
@@ -16,7 +23,7 @@ enum Kind {
/// 1. a
/// ```
Dot,
- /// In a paren (`)`) list.
+ /// In a paren (`)`) list item.
///
/// ## Example
///
@@ -24,7 +31,7 @@ enum Kind {
/// 1) a
/// ```
Paren,
- /// In an asterisk (`*`) list.
+ /// In an asterisk (`*`) list item.
///
/// ## Example
///
@@ -32,7 +39,7 @@ enum Kind {
/// * a
/// ```
Asterisk,
- /// In a plus (`+`) list.
+ /// In a plus (`+`) list item.
///
/// ## Example
///
@@ -40,7 +47,7 @@ enum Kind {
/// + a
/// ```
Plus,
- /// In a dash (`-`) list.
+ /// In a dash (`-`) list item.
///
/// ## Example
///
@@ -51,16 +58,16 @@ enum Kind {
}
impl Kind {
- /// Turn the kind into a [char].
- fn as_char(&self) -> char {
- match self {
- Kind::Dot => '.',
- Kind::Paren => ')',
- Kind::Asterisk => '*',
- Kind::Plus => '+',
- Kind::Dash => '-',
- }
- }
+ // /// Turn the kind into a [char].
+ // fn as_char(&self) -> char {
+ // match self {
+ // Kind::Dot => '.',
+ // Kind::Paren => ')',
+ // Kind::Asterisk => '*',
+ // Kind::Plus => '+',
+ // Kind::Dash => '-',
+ // }
+ // }
/// Turn a [char] into a kind.
///
/// ## Panics
@@ -101,13 +108,13 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
// Unordered.
Code::Char('*' | '+' | '-') => {
// To do: check if this is a thematic break?
- tokenizer.enter(Token::List);
+ tokenizer.enter(Token::ListItem);
tokenizer.enter(Token::ListItemPrefix);
marker(tokenizer, code)
}
// Ordered.
Code::Char(char) if char.is_ascii_digit() => {
- tokenizer.enter(Token::List);
+ tokenizer.enter(Token::ListItem);
tokenizer.enter(Token::ListItemPrefix);
tokenizer.enter(Token::ListItemValue);
// To do: `interrupt || !1`?
@@ -119,10 +126,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// To do.
fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResult {
+ size += 1;
match code {
Code::Char(char) if char.is_ascii_digit() && size < LIST_ITEM_VALUE_SIZE_MAX => {
tokenizer.consume(code);
- size += 1;
(State::Fn(Box::new(move |t, c| inside(t, c, size))), None)
}
// To do: `(!self.interrupt || size < 2)`
@@ -172,6 +179,7 @@ fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
// To do: calculate size.
tokenizer.exit(Token::ListItemPrefix);
+ tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve));
(State::Ok, Some(vec![code]))
}
@@ -189,7 +197,163 @@ fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) ->
(State::Ok, Some(vec![code]))
}
-/// End of a block quote.
+/// To do.
+pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.check(blank_line, |ok| {
+ let func = if ok { blank_cont } else { not_blank_cont };
+ Box::new(func)
+ })(tokenizer, code)
+}
+
+pub fn blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ println!("cont: blank");
+ // self.containerState.furtherBlankLines =
+ // self.containerState.furtherBlankLines ||
+ // self.containerState.initialBlankLine
+
+ // We have a blank line.
+ // Still, try to consume at most the items size.
+ // To do: eat at most `size` whitespace.
+ tokenizer.go(space_or_tab_min_max(0, TAB_SIZE), blank_cont_after)(tokenizer, code)
+}
+
+pub fn blank_cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ println!("cont: blank: after");
+ (State::Ok, Some(vec![code]))
+}
+
+pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ println!("cont: not blank");
+ // if (self.containerState.furtherBlankLines || !markdownSpace(code)) nok
+ // To do: eat exactly `size` whitespace.
+ tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), blank_cont_after)(tokenizer, code)
+}
+
+/// To do.
pub fn end() -> Vec<Token> {
- vec![Token::List]
+ vec![Token::ListItem]
+}
+
+/// To do.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+ let mut edit_map = EditMap::new();
+
+ let mut index = 0;
+ println!("list item:before: {:?}", tokenizer.events.len());
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+ println!(
+ "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
+ index,
+ event.event_type,
+ event.token_type,
+ event.content_type,
+ event.previous,
+ event.next
+ );
+ index += 1;
+ }
+
+ let mut index = 0;
+ let mut balance = 0;
+ let mut list_items: Vec<(Kind, usize, usize, usize)> = vec![];
+ // To do: track balance? Or, check what’s between them?
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.token_type == Token::ListItem {
+ if event.event_type == EventType::Enter {
+ let end = skip::opt(&tokenizer.events, index, &[Token::ListItem]) - 1;
+ let marker = skip::to(&tokenizer.events, index, &[Token::ListItemMarker]) + 1;
+ let codes = codes_from_span(
+ &tokenizer.parse_state.codes,
+ &from_exit_event(&tokenizer.events, marker),
+ );
+ let kind = Kind::from_code(codes[0]);
+ let current = (kind, balance, index, end);
+
+ let previous = list_items.last();
+ let mut matched = false;
+
+ // There’s a previous list item.
+ if let Some(previous) = previous {
+ // …with the same marker and depth, and with only (blank) line endings between them.
+ if previous.0 == current.0
+ && previous.1 == current.1
+ && skip::opt(
+ &tokenizer.events,
+ previous.3 + 1,
+ &[Token::LineEnding, Token::BlankLineEnding],
+ ) == current.2
+ {
+ matched = true;
+ }
+ }
+
+ if matched {
+ let previous = list_items.last_mut().unwrap();
+ previous.3 = current.3;
+ } else {
+ // let previous = list_items.pop();
+ // if let Some(previous) = previous {
+ // lists.push(previous);
+ // }
+
+ println!("prev:!match {:?} {:?}", previous, current);
+ list_items.push(current);
+ }
+
+ println!("enter: {:?}", event.token_type);
+ balance += 1;
+ } else {
+ println!("exit: {:?}", event.token_type);
+ balance -= 1;
+ }
+ }
+
+ index += 1;
+ }
+
+ let mut index = 0;
+ while index < list_items.len() {
+ let list_item = &list_items[index];
+ let mut list_start = tokenizer.events[list_item.2].clone();
+ let token_type = if matches!(list_item.0, Kind::Paren | Kind::Dot) {
+ Token::ListOrdered
+ } else {
+ Token::ListUnordered
+ };
+ list_start.token_type = token_type.clone();
+ let mut list_end = tokenizer.events[list_item.3].clone();
+ list_end.token_type = token_type;
+ println!("inject: {:?} {:?}", list_start, list_end);
+
+ edit_map.add(list_item.2, 0, vec![list_start]);
+ edit_map.add(list_item.3 + 1, 0, vec![list_end]);
+
+ index += 1;
+ }
+
+ println!("list items: {:#?}", list_items);
+
+ let events = edit_map.consume(&mut tokenizer.events);
+
+ let mut index = 0;
+ println!("list item:after: {:?}", events.len());
+ while index < events.len() {
+ let event = &events[index];
+ println!(
+ "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
+ index,
+ event.event_type,
+ event.token_type,
+ event.content_type,
+ event.previous,
+ event.next
+ );
+ index += 1;
+ }
+
+ events
}
diff --git a/src/content/document.rs b/src/content/document.rs
index e32534e..c5bf5c8 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -10,7 +10,7 @@
use crate::construct::{
block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote},
- list::{end as list_end, start as list},
+ list::{cont as list_const, end as list_end, start as list},
};
use crate::content::flow::start as flow;
use crate::parser::ParseState;
@@ -100,7 +100,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR
let cont = if name == "blockquote" {
block_quote_cont
} else if name == "list" {
- unreachable!("todo: list cont {:?}", name)
+ list_const
} else {
unreachable!("todo: cont construct {:?}", name)
};
@@ -183,7 +183,8 @@ fn there_is_a_new_container(
name: String,
) -> StateFnResult {
let size = info.continued;
- info = exit_containers(tokenizer, info, size, true);
+ println!("exit:0: {:?}", false);
+ info = exit_containers(tokenizer, info, size, false);
tokenizer.expect(code, true);
// Remove from the event stack.
@@ -272,6 +273,7 @@ fn exit_containers(
let mut index = 0;
while index < token_types.len() {
let token_type = &token_types[index];
+ println!("creating exit: {:?}", token_type);
exits.push(Event {
event_type: EventType::Exit,
@@ -289,7 +291,16 @@ fn exit_containers(
}
if !exits.is_empty() {
- let index = info.inject.len() - 1 - (if before { 1 } else { 0 });
+ let before = if before { 1 } else { 0 };
+ let mut index = info.inject.len() - 1;
+ println!("inject: {:?} {:?}", info.inject.len() - 1, before);
+ if before >= index {
+ // To do: maybe, if this branch happens, it’s a bug?
+ println!("inject:0: {:?}", index);
+ index = 0;
+ } else {
+ println!("set: {:?}", index);
+ }
info.inject[index].1.append(&mut exits);
}
@@ -377,6 +388,7 @@ fn flow_end(
}
// Exit containers.
+ println!("exit:1: {:?}", true);
info = exit_containers(tokenizer, info, continued, true);
tokenizer.expect(code, true);
@@ -386,6 +398,7 @@ fn flow_end(
match result {
State::Ok => {
+ println!("exit:3: {:?}", false);
info = exit_containers(tokenizer, info, 0, false);
tokenizer.expect(code, true);
@@ -433,7 +446,7 @@ fn flow_end(
tokenizer.events = map.consume(&mut tokenizer.events);
let mut index = 0;
- println!("after: {:?}", tokenizer.events.len());
+ println!("document:after: {:?}", tokenizer.events.len());
while index < tokenizer.events.len() {
let event = &tokenizer.events[index];
println!(
diff --git a/src/token.rs b/src/token.rs
index f60f9cd..889c3ba 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -1767,7 +1767,9 @@ pub enum Token {
ThematicBreakSequence,
// To do: sort.
- List,
+ ListOrdered,
+ ListUnordered,
+ ListItem,
ListItemPrefix,
ListItemValue,
ListItemMarker,
diff --git a/src/util/skip.rs b/src/util/skip.rs
index 971beb6..3307734 100644
--- a/src/util/skip.rs
+++ b/src/util/skip.rs
@@ -1,7 +1,7 @@
//! Utilities to deal with lists of events.
use crate::token::Token;
-use crate::tokenizer::Event;
+use crate::tokenizer::{Event, EventType};
/// Skip from `index`, optionally past `token_types`.
pub fn opt(events: &[Event], index: usize, token_types: &[Token]) -> usize {
@@ -13,33 +13,60 @@ pub fn opt_back(events: &[Event], index: usize, token_types: &[Token]) -> usize
skip_opt_with_direction(events, index, token_types, false)
}
+pub fn to(events: &[Event], mut index: usize, token_types: &[Token]) -> usize {
+ while index < events.len() {
+ let current = &events[index].token_type;
+
+ if token_types.contains(current) {
+ break;
+ }
+
+ index += 1;
+ }
+
+ index
+}
+
/// Skip internals.
fn skip_opt_with_direction(
events: &[Event],
- index: usize,
+ mut index: usize,
token_types: &[Token],
forward: bool,
) -> usize {
- let mut index = index;
+ let mut balance = 0;
+ let open = if forward {
+ EventType::Enter
+ } else {
+ EventType::Exit
+ };
while index < events.len() {
let current = &events[index].token_type;
- if !token_types.contains(current) {
+ if !token_types.contains(current) || events[index].event_type != open {
break;
}
- // assert_eq!(events[index].event_type, EventType::Enter);
index = if forward { index + 1 } else { index - 1 };
+ balance += 1;
loop {
- if events[index].token_type == *current {
- // assert_eq!(events[index].event_type, EventType::Exit);
+ balance = if events[index].event_type == open {
+ balance + 1
+ } else {
+ balance - 1
+ };
+
+ if events[index].token_type == *current && balance == 0 {
+ println!("close:it! {:?} {:?}", events[index].token_type, balance);
index = if forward { index + 1 } else { index - 1 };
+ println!("index:break: {:?}", index);
break;
}
index = if forward { index + 1 } else { index - 1 };
+ println!("index:loop: {:?}", index);
}
}