aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-12 13:00:53 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-12 13:00:53 +0200
commit75522b867b15b9a400275cfec9a2ead4ff535473 (patch)
tree4a9511f6b8899e1ead2ca02686ffd571b1bd4e1f /src
parent2ce19d9fd8f75ee1e3d62762e91f5d18303d4d6b (diff)
downloadmarkdown-rs-75522b867b15b9a400275cfec9a2ead4ff535473.tar.gz
markdown-rs-75522b867b15b9a400275cfec9a2ead4ff535473.tar.bz2
markdown-rs-75522b867b15b9a400275cfec9a2ead4ff535473.zip
Add initial support for lists
Diffstat (limited to '')
-rw-r--r--src/compiler.rs82
-rw-r--r--src/constant.rs4
-rw-r--r--src/construct/list.rs195
-rw-r--r--src/construct/mod.rs4
-rw-r--r--src/content/document.rs31
-rw-r--r--src/token.rs7
6 files changed, 308 insertions, 15 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 8a28654..753d85f 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -235,6 +235,7 @@ struct CompileContext<'a> {
pub code_flow_seen_data: Option<bool>,
pub code_fenced_fences_count: Option<usize>,
pub character_reference_kind: Option<CharacterReferenceKind>,
+ pub expect_first_item: Option<bool>,
pub media_stack: Vec<Media>,
pub definitions: HashMap<String, Definition>,
pub tight_stack: Vec<bool>,
@@ -269,6 +270,7 @@ impl<'a> CompileContext<'a> {
code_flow_seen_data: None,
code_fenced_fences_count: None,
character_reference_kind: None,
+ expect_first_item: None,
media_stack: vec![],
definitions: HashMap::new(),
tight_stack: vec![],
@@ -379,7 +381,7 @@ impl<'a> CompileContext<'a> {
/// Turn events and codes into a string of HTML.
#[allow(clippy::too_many_lines)]
pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
- // let mut slurp_all_line_endings = false;
+ // let slurp_all_line_endings = false;
let mut index = 0;
let mut line_ending_inferred: Option<LineEnding> = None;
@@ -443,6 +445,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
enter_map.insert(Token::ResourceTitleString, on_enter_buffer);
enter_map.insert(Token::Strong, on_enter_strong);
+ // To do: sort.
+ enter_map.insert(Token::ListItemMarker, on_enter_list_item_marker);
+ enter_map.insert(Token::List, on_enter_list);
+
let mut exit_map: Map = HashMap::new();
exit_map.insert(Token::AutolinkEmail, on_exit_autolink_email);
exit_map.insert(Token::AutolinkProtocol, on_exit_autolink_protocol);
@@ -488,7 +494,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
on_exit_definition_title_string,
);
exit_map.insert(Token::Emphasis, on_exit_emphasis);
-
exit_map.insert(Token::HardBreakEscape, on_exit_break);
exit_map.insert(Token::HardBreakTrailing, on_exit_break);
exit_map.insert(Token::HeadingAtx, on_exit_heading_atx);
@@ -519,6 +524,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
exit_map.insert(Token::Strong, on_exit_strong);
exit_map.insert(Token::ThematicBreak, on_exit_thematic_break);
+ // To do: sort.
+ exit_map.insert(Token::List, on_exit_list);
+ exit_map.insert(Token::ListItemValue, on_exit_list_item_value);
+
// Handle one event.
let handle = |context: &mut CompileContext, index: usize| {
let event = &events[index];
@@ -766,7 +775,7 @@ fn on_exit_block_quote(context: &mut CompileContext) {
context.tight_stack.pop();
context.line_ending_if_needed();
context.tag("</blockquote>".to_string());
- // let mut slurp_all_line_endings = false;
+ // context.slurp_all_line_endings = false;
}
/// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker].
@@ -1056,7 +1065,7 @@ fn on_exit_label_text(context: &mut CompileContext) {
/// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding].
fn on_exit_line_ending(context: &mut CompileContext) {
- // if slurp_all_line_endings {
+ // if context.slurp_all_line_endings {
// // Empty.
// } else
if context.slurp_one_line_ending {
@@ -1176,3 +1185,68 @@ fn on_exit_thematic_break(context: &mut CompileContext) {
context.line_ending_if_needed();
context.tag("<hr />".to_string());
}
+
+// To do: sort.
+/// To do
+fn on_enter_list_item_marker(context: &mut CompileContext) {
+ let expect_first_item = context.expect_first_item.take().unwrap();
+
+ if expect_first_item {
+ context.tag(">".to_string());
+ } else {
+ on_exit_list_item(context);
+ }
+
+ context.line_ending_if_needed();
+ context.tag("<li>".to_string());
+ context.expect_first_item = Some(false);
+ // “Hack” to prevent a line ending from showing up if the item is empty.
+ context.last_was_tag = false;
+}
+
+/// To do (onenterlist{un,}ordered)
+fn on_enter_list(context: &mut CompileContext) {
+ // To do: !token._loose
+ context.tight_stack.push(false);
+ context.line_ending_if_needed();
+ context.tag("<ol".to_string()); // To do: `ol` / `ul`.
+ context.expect_first_item = Some(true);
+}
+
+/// To do
+fn on_exit_list_item_value(context: &mut CompileContext) {
+ let expect_first_item = context.expect_first_item.unwrap();
+
+ if expect_first_item {
+ let slice = serialize(
+ context.codes,
+ &from_exit_event(context.events, context.index),
+ false,
+ );
+ let value = slice.parse::<u32>().ok().unwrap();
+
+ if value != 1 {
+ context.tag(format!(" start=\"{}\"", encode(&value.to_string())));
+ }
+ }
+}
+
+/// To do.
+/// Note: there is no actual `Token::ListItem`.
+fn on_exit_list_item(context: &mut CompileContext) {
+ // && !context.slurp_all_line_endings
+ if context.last_was_tag {
+ context.line_ending_if_needed();
+ }
+
+ context.tag("</li>".to_string()); // To do: `ol` / `ul`.
+ // context.slurp_all_line_endings = false;
+}
+
+/// To do.
+fn on_exit_list(context: &mut CompileContext) {
+ on_exit_list_item(context);
+ context.tight_stack.pop();
+ context.line_ending();
+ context.tag("</ol>".to_string()); // To do: `ol` / `ul`.
+}
diff --git a/src/constant.rs b/src/constant.rs
index fc74316..5dd2435 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -193,6 +193,10 @@ pub const HTML_RAW_SIZE_MAX: usize = 8;
/// To safeguard performance, labels are capped at a large number: `999`.
pub const LINK_REFERENCE_SIZE_MAX: usize = 999;
+/// To do.
+/// See: <https://spec.commonmark.org/0.30/#ordered-list-marker>
+pub const LIST_ITEM_VALUE_SIZE_MAX: usize = 10;
+
/// Maximum allowed unbalanced parens in destination.
///
/// There can be many balanced parens, but if there are 33 opens that were not
diff --git a/src/construct/list.rs b/src/construct/list.rs
new file mode 100644
index 0000000..96b2496
--- /dev/null
+++ b/src/construct/list.rs
@@ -0,0 +1,195 @@
+//! To do.
+
+use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};
+use crate::construct::partial_space_or_tab::space_or_tab_min_max;
+use crate::token::Token;
+use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
+
+/// Type of title.
+#[derive(Debug, PartialEq)]
+enum Kind {
+ /// In a dot (`.`) list.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// 1. a
+ /// ```
+ Dot,
+ /// In a paren (`)`) list.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// 1) a
+ /// ```
+ Paren,
+ /// In an asterisk (`*`) list.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// * a
+ /// ```
+ Asterisk,
+ /// In a plus (`+`) list.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// + a
+ /// ```
+ Plus,
+ /// In a dash (`-`) list.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// - a
+ /// ```
+ Dash,
+}
+
+impl Kind {
+ /// Turn the kind into a [char].
+ fn as_char(&self) -> char {
+ match self {
+ Kind::Dot => '.',
+ Kind::Paren => ')',
+ Kind::Asterisk => '*',
+ Kind::Plus => '+',
+ Kind::Dash => '-',
+ }
+ }
+ /// Turn a [char] into a kind.
+ ///
+ /// ## Panics
+ ///
+ /// Panics if `char` is not `.`, `)`, `*`, `+`, or `-`.
+ fn from_char(char: char) -> Kind {
+ match char {
+ '.' => Kind::Dot,
+ ')' => Kind::Paren,
+ '*' => Kind::Asterisk,
+ '+' => Kind::Plus,
+ '-' => Kind::Dash,
+ _ => unreachable!("invalid char"),
+ }
+ }
+ /// Turn [Code] into a kind.
+ ///
+ /// ## Panics
+ ///
+ /// Panics if `code` is not `Code::Char('.' | ')' | '*' | '+' | '-')`.
+ fn from_code(code: Code) -> Kind {
+ match code {
+ Code::Char(char) => Kind::from_char(char),
+ _ => unreachable!("invalid code"),
+ }
+ }
+}
+
+/// To do.
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ // To do: allow arbitrary when code (indented) is turned off.
+ tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
+}
+
+/// To do.
+fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ // Unordered.
+ Code::Char('*' | '+' | '-') => {
+ // To do: check if this is a thematic break?
+ tokenizer.enter(Token::List);
+ tokenizer.enter(Token::ListItemPrefix);
+ marker(tokenizer, code)
+ }
+ // Ordered.
+ Code::Char(char) if char.is_ascii_digit() => {
+ tokenizer.enter(Token::List);
+ tokenizer.enter(Token::ListItemPrefix);
+ tokenizer.enter(Token::ListItemValue);
+ // To do: `interrupt || !1`?
+ inside(tokenizer, code, 0)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// To do.
+fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResult {
+ match code {
+ Code::Char(char) if char.is_ascii_digit() && size < LIST_ITEM_VALUE_SIZE_MAX => {
+ tokenizer.consume(code);
+ size += 1;
+ (State::Fn(Box::new(move |t, c| inside(t, c, size))), None)
+ }
+ // To do: `(!self.interrupt || size < 2)`
+ Code::Char('.' | ')') => {
+ tokenizer.exit(Token::ListItemValue);
+ marker(tokenizer, code)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// To do.
+fn marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ let kind = Kind::from_code(code);
+ println!("list item kind: {:?}", kind);
+ tokenizer.enter(Token::ListItemMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(Token::ListItemMarker);
+ // To do: check blank line, if true `State::Nok` else `on_blank`.
+ (State::Fn(Box::new(marker_after)), None)
+}
+
+/// To do.
+fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.attempt(list_item_prefix_whitespace, |ok| {
+ let func = if ok { prefix_end } else { prefix_other };
+ Box::new(func)
+ })(tokenizer, code)
+}
+
+// To do: `on_blank`.
+
+/// To do.
+fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ tokenizer.enter(Token::SpaceOrTab);
+ tokenizer.consume(code);
+ tokenizer.exit(Token::SpaceOrTab);
+ (State::Fn(Box::new(prefix_end)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// To do.
+fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ // To do: calculate size.
+ tokenizer.exit(Token::ListItemPrefix);
+ (State::Ok, Some(vec![code]))
+}
+
+/// To do.
+fn list_item_prefix_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ // To do: check how big this should be?
+ tokenizer.go(
+ space_or_tab_min_max(1, TAB_SIZE - 1),
+ list_item_prefix_whitespace_after,
+ )(tokenizer, code)
+}
+
+fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ // To do: check some stuff?
+ (State::Ok, Some(vec![code]))
+}
+
+/// End of a block quote.
+pub fn end() -> Vec<Token> {
+ vec![Token::List]
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 06ff4e9..be9dfe3 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -33,7 +33,7 @@
//! * [label end][label_end]
//! * [label start (image)][label_start_image]
//! * [label start (link)][label_start_link]
-//! * list
+//! * [list][]
//! * [paragraph][]
//! * [thematic break][thematic_break]
//!
@@ -42,6 +42,7 @@
//! * [data][partial_data]
//! * [destination][partial_destination]
//! * [label][partial_label]
+//! * [non lazy continuation][partial_non_lazy_continuation]
//! * [space or tab][partial_space_or_tab]
//! * [title][partial_title]
//! * [whitespace][partial_whitespace]
@@ -80,6 +81,7 @@ pub mod html_text;
pub mod label_end;
pub mod label_start_image;
pub mod label_start_link;
+pub mod list;
pub mod paragraph;
pub mod partial_data;
pub mod partial_destination;
diff --git a/src/content/document.rs b/src/content/document.rs
index a8ff775..e32534e 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -8,8 +8,9 @@
//! * [Block quote][crate::construct::block_quote]
//! * List
-use crate::construct::block_quote::{
- cont as block_quote_cont, end as block_quote_end, start as block_quote,
+use crate::construct::{
+ block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote},
+ list::{end as list_end, start as list},
};
use crate::content::flow::start as flow;
use crate::parser::ParseState;
@@ -96,9 +97,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR
// continuation line.
if info.continued < info.stack.len() {
let name = &info.stack[info.continued];
- // To do: list.
let cont = if name == "blockquote" {
block_quote_cont
+ } else if name == "list" {
+ unreachable!("todo: list cont {:?}", name)
} else {
unreachable!("todo: cont construct {:?}", name)
};
@@ -157,12 +159,19 @@ fn check_new_containers(
}
// Check if there is a new container.
- // To do: list.
tokenizer.attempt(block_quote, move |ok| {
if ok {
Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string()))
} else {
- Box::new(|t, c| there_is_no_new_container(t, c, info))
+ Box::new(|tokenizer, code| {
+ tokenizer.attempt(list, move |ok| {
+ if ok {
+ Box::new(|t, c| there_is_a_new_container(t, c, info, "list".to_string()))
+ } else {
+ Box::new(|t, c| there_is_no_new_container(t, c, info))
+ }
+ })(tokenizer, code)
+ })
}
})(tokenizer, code)
}
@@ -179,11 +188,12 @@ fn there_is_a_new_container(
// Remove from the event stack.
// We’ll properly add exits at different points manually.
- // To do: list.
let end = if name == "blockquote" {
block_quote_end
+ } else if name == "list" {
+ list_end
} else {
- unreachable!("todo: cont {:?}", name)
+ unreachable!("todo: end {:?}", name)
};
let token_types = end();
@@ -249,11 +259,12 @@ fn exit_containers(
while info.stack.len() > size {
let name = info.stack.pop().unwrap();
- // To do: list.
let end = if name == "blockquote" {
block_quote_end
+ } else if name == "list" {
+ list_end
} else {
- unreachable!("todo: cont {:?}", name)
+ unreachable!("todo: end {:?}", name)
};
let token_types = end();
@@ -265,7 +276,7 @@ fn exit_containers(
exits.push(Event {
event_type: EventType::Exit,
token_type: token_type.clone(),
- // To do: fix position later.
+ // Note: positions are fixed later.
point: tokenizer.point.clone(),
index: tokenizer.index,
previous: None,
diff --git a/src/token.rs b/src/token.rs
index 9b59719..f60f9cd 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -1765,4 +1765,11 @@ pub enum Token {
/// ^ ^ ^
/// ```
ThematicBreakSequence,
+
+ // To do: sort.
+ List,
+ ListItemPrefix,
+ ListItemValue,
+ ListItemMarker,
+ // ListItemPrefixSpaceOrTab,
}