From 75522b867b15b9a400275cfec9a2ead4ff535473 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 12 Jul 2022 13:00:53 +0200 Subject: Add initial support for lists --- src/compiler.rs | 82 +++++++++++++++++++- src/constant.rs | 4 + src/construct/list.rs | 195 ++++++++++++++++++++++++++++++++++++++++++++++++ src/construct/mod.rs | 4 +- src/content/document.rs | 31 +++++--- src/token.rs | 7 ++ 6 files changed, 308 insertions(+), 15 deletions(-) create mode 100644 src/construct/list.rs (limited to 'src') diff --git a/src/compiler.rs b/src/compiler.rs index 8a28654..753d85f 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -235,6 +235,7 @@ struct CompileContext<'a> { pub code_flow_seen_data: Option, pub code_fenced_fences_count: Option, pub character_reference_kind: Option, + pub expect_first_item: Option, pub media_stack: Vec, pub definitions: HashMap, pub tight_stack: Vec, @@ -269,6 +270,7 @@ impl<'a> CompileContext<'a> { code_flow_seen_data: None, code_fenced_fences_count: None, character_reference_kind: None, + expect_first_item: None, media_stack: vec![], definitions: HashMap::new(), tight_stack: vec![], @@ -379,7 +381,7 @@ impl<'a> CompileContext<'a> { /// Turn events and codes into a string of HTML. #[allow(clippy::too_many_lines)] pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { - // let mut slurp_all_line_endings = false; + // let slurp_all_line_endings = false; let mut index = 0; let mut line_ending_inferred: Option = None; @@ -443,6 +445,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { enter_map.insert(Token::ResourceTitleString, on_enter_buffer); enter_map.insert(Token::Strong, on_enter_strong); + // To do: sort. + enter_map.insert(Token::ListItemMarker, on_enter_list_item_marker); + enter_map.insert(Token::List, on_enter_list); + let mut exit_map: Map = HashMap::new(); exit_map.insert(Token::AutolinkEmail, on_exit_autolink_email); exit_map.insert(Token::AutolinkProtocol, on_exit_autolink_protocol); @@ -488,7 +494,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { on_exit_definition_title_string, ); exit_map.insert(Token::Emphasis, on_exit_emphasis); - exit_map.insert(Token::HardBreakEscape, on_exit_break); exit_map.insert(Token::HardBreakTrailing, on_exit_break); exit_map.insert(Token::HeadingAtx, on_exit_heading_atx); @@ -519,6 +524,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { exit_map.insert(Token::Strong, on_exit_strong); exit_map.insert(Token::ThematicBreak, on_exit_thematic_break); + // To do: sort. + exit_map.insert(Token::List, on_exit_list); + exit_map.insert(Token::ListItemValue, on_exit_list_item_value); + // Handle one event. let handle = |context: &mut CompileContext, index: usize| { let event = &events[index]; @@ -766,7 +775,7 @@ fn on_exit_block_quote(context: &mut CompileContext) { context.tight_stack.pop(); context.line_ending_if_needed(); context.tag("".to_string()); - // let mut slurp_all_line_endings = false; + // context.slurp_all_line_endings = false; } /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker]. @@ -1056,7 +1065,7 @@ fn on_exit_label_text(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding]. fn on_exit_line_ending(context: &mut CompileContext) { - // if slurp_all_line_endings { + // if context.slurp_all_line_endings { // // Empty. // } else if context.slurp_one_line_ending { @@ -1176,3 +1185,68 @@ fn on_exit_thematic_break(context: &mut CompileContext) { context.line_ending_if_needed(); context.tag("
".to_string()); } + +// To do: sort. +/// To do +fn on_enter_list_item_marker(context: &mut CompileContext) { + let expect_first_item = context.expect_first_item.take().unwrap(); + + if expect_first_item { + context.tag(">".to_string()); + } else { + on_exit_list_item(context); + } + + context.line_ending_if_needed(); + context.tag("
  • ".to_string()); + context.expect_first_item = Some(false); + // “Hack” to prevent a line ending from showing up if the item is empty. + context.last_was_tag = false; +} + +/// To do (onenterlist{un,}ordered) +fn on_enter_list(context: &mut CompileContext) { + // To do: !token._loose + context.tight_stack.push(false); + context.line_ending_if_needed(); + context.tag("().ok().unwrap(); + + if value != 1 { + context.tag(format!(" start=\"{}\"", encode(&value.to_string()))); + } + } +} + +/// To do. +/// Note: there is no actual `Token::ListItem`. +fn on_exit_list_item(context: &mut CompileContext) { + // && !context.slurp_all_line_endings + if context.last_was_tag { + context.line_ending_if_needed(); + } + + context.tag("
  • ".to_string()); // To do: `ol` / `ul`. + // context.slurp_all_line_endings = false; +} + +/// To do. +fn on_exit_list(context: &mut CompileContext) { + on_exit_list_item(context); + context.tight_stack.pop(); + context.line_ending(); + context.tag("".to_string()); // To do: `ol` / `ul`. +} diff --git a/src/constant.rs b/src/constant.rs index fc74316..5dd2435 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -193,6 +193,10 @@ pub const HTML_RAW_SIZE_MAX: usize = 8; /// To safeguard performance, labels are capped at a large number: `999`. pub const LINK_REFERENCE_SIZE_MAX: usize = 999; +/// To do. +/// See: +pub const LIST_ITEM_VALUE_SIZE_MAX: usize = 10; + /// Maximum allowed unbalanced parens in destination. /// /// There can be many balanced parens, but if there are 33 opens that were not diff --git a/src/construct/list.rs b/src/construct/list.rs new file mode 100644 index 0000000..96b2496 --- /dev/null +++ b/src/construct/list.rs @@ -0,0 +1,195 @@ +//! To do. + +use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}; +use crate::construct::partial_space_or_tab::space_or_tab_min_max; +use crate::token::Token; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +/// Type of title. +#[derive(Debug, PartialEq)] +enum Kind { + /// In a dot (`.`) list. + /// + /// ## Example + /// + /// ```markdown + /// 1. a + /// ``` + Dot, + /// In a paren (`)`) list. + /// + /// ## Example + /// + /// ```markdown + /// 1) a + /// ``` + Paren, + /// In an asterisk (`*`) list. + /// + /// ## Example + /// + /// ```markdown + /// * a + /// ``` + Asterisk, + /// In a plus (`+`) list. + /// + /// ## Example + /// + /// ```markdown + /// + a + /// ``` + Plus, + /// In a dash (`-`) list. + /// + /// ## Example + /// + /// ```markdown + /// - a + /// ``` + Dash, +} + +impl Kind { + /// Turn the kind into a [char]. + fn as_char(&self) -> char { + match self { + Kind::Dot => '.', + Kind::Paren => ')', + Kind::Asterisk => '*', + Kind::Plus => '+', + Kind::Dash => '-', + } + } + /// Turn a [char] into a kind. + /// + /// ## Panics + /// + /// Panics if `char` is not `.`, `)`, `*`, `+`, or `-`. + fn from_char(char: char) -> Kind { + match char { + '.' => Kind::Dot, + ')' => Kind::Paren, + '*' => Kind::Asterisk, + '+' => Kind::Plus, + '-' => Kind::Dash, + _ => unreachable!("invalid char"), + } + } + /// Turn [Code] into a kind. + /// + /// ## Panics + /// + /// Panics if `code` is not `Code::Char('.' | ')' | '*' | '+' | '-')`. + fn from_code(code: Code) -> Kind { + match code { + Code::Char(char) => Kind::from_char(char), + _ => unreachable!("invalid code"), + } + } +} + +/// To do. +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // To do: allow arbitrary when code (indented) is turned off. + tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) +} + +/// To do. +fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + // Unordered. + Code::Char('*' | '+' | '-') => { + // To do: check if this is a thematic break? + tokenizer.enter(Token::List); + tokenizer.enter(Token::ListItemPrefix); + marker(tokenizer, code) + } + // Ordered. + Code::Char(char) if char.is_ascii_digit() => { + tokenizer.enter(Token::List); + tokenizer.enter(Token::ListItemPrefix); + tokenizer.enter(Token::ListItemValue); + // To do: `interrupt || !1`? + inside(tokenizer, code, 0) + } + _ => (State::Nok, None), + } +} + +/// To do. +fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResult { + match code { + Code::Char(char) if char.is_ascii_digit() && size < LIST_ITEM_VALUE_SIZE_MAX => { + tokenizer.consume(code); + size += 1; + (State::Fn(Box::new(move |t, c| inside(t, c, size))), None) + } + // To do: `(!self.interrupt || size < 2)` + Code::Char('.' | ')') => { + tokenizer.exit(Token::ListItemValue); + marker(tokenizer, code) + } + _ => (State::Nok, None), + } +} + +/// To do. +fn marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let kind = Kind::from_code(code); + println!("list item kind: {:?}", kind); + tokenizer.enter(Token::ListItemMarker); + tokenizer.consume(code); + tokenizer.exit(Token::ListItemMarker); + // To do: check blank line, if true `State::Nok` else `on_blank`. + (State::Fn(Box::new(marker_after)), None) +} + +/// To do. +fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt(list_item_prefix_whitespace, |ok| { + let func = if ok { prefix_end } else { prefix_other }; + Box::new(func) + })(tokenizer, code) +} + +// To do: `on_blank`. + +/// To do. +fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::VirtualSpace | Code::Char('\t' | ' ') => { + tokenizer.enter(Token::SpaceOrTab); + tokenizer.consume(code); + tokenizer.exit(Token::SpaceOrTab); + (State::Fn(Box::new(prefix_end)), None) + } + _ => (State::Nok, None), + } +} + +/// To do. +fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // To do: calculate size. + tokenizer.exit(Token::ListItemPrefix); + (State::Ok, Some(vec![code])) +} + +/// To do. +fn list_item_prefix_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // To do: check how big this should be? + tokenizer.go( + space_or_tab_min_max(1, TAB_SIZE - 1), + list_item_prefix_whitespace_after, + )(tokenizer, code) +} + +fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // To do: check some stuff? + (State::Ok, Some(vec![code])) +} + +/// End of a block quote. +pub fn end() -> Vec { + vec![Token::List] +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 06ff4e9..be9dfe3 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -33,7 +33,7 @@ //! * [label end][label_end] //! * [label start (image)][label_start_image] //! * [label start (link)][label_start_link] -//! * list +//! * [list][] //! * [paragraph][] //! * [thematic break][thematic_break] //! @@ -42,6 +42,7 @@ //! * [data][partial_data] //! * [destination][partial_destination] //! * [label][partial_label] +//! * [non lazy continuation][partial_non_lazy_continuation] //! * [space or tab][partial_space_or_tab] //! * [title][partial_title] //! * [whitespace][partial_whitespace] @@ -80,6 +81,7 @@ pub mod html_text; pub mod label_end; pub mod label_start_image; pub mod label_start_link; +pub mod list; pub mod paragraph; pub mod partial_data; pub mod partial_destination; diff --git a/src/content/document.rs b/src/content/document.rs index a8ff775..e32534e 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -8,8 +8,9 @@ //! * [Block quote][crate::construct::block_quote] //! * List -use crate::construct::block_quote::{ - cont as block_quote_cont, end as block_quote_end, start as block_quote, +use crate::construct::{ + block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote}, + list::{end as list_end, start as list}, }; use crate::content::flow::start as flow; use crate::parser::ParseState; @@ -96,9 +97,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR // continuation line. if info.continued < info.stack.len() { let name = &info.stack[info.continued]; - // To do: list. let cont = if name == "blockquote" { block_quote_cont + } else if name == "list" { + unreachable!("todo: list cont {:?}", name) } else { unreachable!("todo: cont construct {:?}", name) }; @@ -157,12 +159,19 @@ fn check_new_containers( } // Check if there is a new container. - // To do: list. tokenizer.attempt(block_quote, move |ok| { if ok { Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string())) } else { - Box::new(|t, c| there_is_no_new_container(t, c, info)) + Box::new(|tokenizer, code| { + tokenizer.attempt(list, move |ok| { + if ok { + Box::new(|t, c| there_is_a_new_container(t, c, info, "list".to_string())) + } else { + Box::new(|t, c| there_is_no_new_container(t, c, info)) + } + })(tokenizer, code) + }) } })(tokenizer, code) } @@ -179,11 +188,12 @@ fn there_is_a_new_container( // Remove from the event stack. // We’ll properly add exits at different points manually. - // To do: list. let end = if name == "blockquote" { block_quote_end + } else if name == "list" { + list_end } else { - unreachable!("todo: cont {:?}", name) + unreachable!("todo: end {:?}", name) }; let token_types = end(); @@ -249,11 +259,12 @@ fn exit_containers( while info.stack.len() > size { let name = info.stack.pop().unwrap(); - // To do: list. let end = if name == "blockquote" { block_quote_end + } else if name == "list" { + list_end } else { - unreachable!("todo: cont {:?}", name) + unreachable!("todo: end {:?}", name) }; let token_types = end(); @@ -265,7 +276,7 @@ fn exit_containers( exits.push(Event { event_type: EventType::Exit, token_type: token_type.clone(), - // To do: fix position later. + // Note: positions are fixed later. point: tokenizer.point.clone(), index: tokenizer.index, previous: None, diff --git a/src/token.rs b/src/token.rs index 9b59719..f60f9cd 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1765,4 +1765,11 @@ pub enum Token { /// ^ ^ ^ /// ``` ThematicBreakSequence, + + // To do: sort. + List, + ListItemPrefix, + ListItemValue, + ListItemMarker, + // ListItemPrefixSpaceOrTab, } -- cgit