diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-12 17:47:08 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-12 17:47:08 +0200 |
commit | 2e3b7abaa9877b658fa4f8f2612acc617dff60bb (patch) | |
tree | e823d041521a4af33a7e552ba58f1d4b63335be3 /src | |
parent | 75522b867b15b9a400275cfec9a2ead4ff535473 (diff) | |
download | markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.tar.gz markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.tar.bz2 markdown-rs-2e3b7abaa9877b658fa4f8f2612acc617dff60bb.zip |
Fix a lot of list things
* Add `ListItem`, `ListOrdered`, and `ListUnordered` tokens
* Add support for multiline list items
* Add support for tight lists
* Fix bug where 10 digit long list item values worked
* Fix skip bug when skipping over nested events
Diffstat (limited to '')
-rw-r--r-- | src/compiler.rs | 91 | ||||
-rw-r--r-- | src/construct/list.rs | 210 | ||||
-rw-r--r-- | src/content/document.rs | 23 | ||||
-rw-r--r-- | src/token.rs | 4 | ||||
-rw-r--r-- | src/util/skip.rs | 41 |
5 files changed, 313 insertions, 56 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 753d85f..2d42011 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -447,7 +447,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { // To do: sort. enter_map.insert(Token::ListItemMarker, on_enter_list_item_marker); - enter_map.insert(Token::List, on_enter_list); + enter_map.insert(Token::ListOrdered, on_enter_list); + enter_map.insert(Token::ListUnordered, on_enter_list); let mut exit_map: Map = HashMap::new(); exit_map.insert(Token::AutolinkEmail, on_exit_autolink_email); @@ -525,8 +526,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { exit_map.insert(Token::ThematicBreak, on_exit_thematic_break); // To do: sort. - exit_map.insert(Token::List, on_exit_list); exit_map.insert(Token::ListItemValue, on_exit_list_item_value); + exit_map.insert(Token::ListItem, on_exit_list_item); + exit_map.insert(Token::ListOrdered, on_exit_list); + exit_map.insert(Token::ListUnordered, on_exit_list); // Handle one event. let handle = |context: &mut CompileContext, index: usize| { @@ -708,8 +711,14 @@ fn on_enter_link(context: &mut CompileContext) { /// Handle [`Enter`][EventType::Enter]:[`Paragraph`][Token::Paragraph]. fn on_enter_paragraph(context: &mut CompileContext) { - context.line_ending_if_needed(); - context.tag("<p>".to_string()); + let tight = context.tight_stack.last().unwrap_or(&false); + + if !tight { + context.line_ending_if_needed(); + context.tag("<p>".to_string()); + } + + // context.slurp_all_line_endings = false; } /// Handle [`Enter`][EventType::Enter]:[`Resource`][Token::Resource]. @@ -1145,7 +1154,11 @@ fn on_exit_media(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`Paragraph`][Token::Paragraph]. fn on_exit_paragraph(context: &mut CompileContext) { - context.tag("</p>".to_string()); + let tight = context.tight_stack.last().unwrap_or(&false); + + if !tight { + context.tag("</p>".to_string()); + } } /// Handle [`Exit`][EventType::Exit]:[`ReferenceString`][Token::ReferenceString]. @@ -1187,14 +1200,58 @@ fn on_exit_thematic_break(context: &mut CompileContext) { } // To do: sort. +/// To do (onenterlist{un,}ordered) +fn on_enter_list(context: &mut CompileContext) { + let events = &context.events; + let mut index = context.index; + let mut balance = 0; + let mut loose = false; + let token_type = &events[index].token_type; + + while index < events.len() { + let event = &events[index]; + + if event.event_type == EventType::Enter { + balance += 1; + } else { + balance -= 1; + + // Blank line directly in list or directly in list item. + if balance < 3 && event.token_type == Token::BlankLineEnding { + loose = true; + break; + } + + // Done. + if balance == 0 && event.token_type == *token_type { + break; + } + } + + index += 1; + } + + println!("list: {:?} {:?}", token_type, loose); + context.tight_stack.push(!loose); + context.line_ending_if_needed(); + // Note: no `>`. + context.tag(format!( + "<{}", + if *token_type == Token::ListOrdered { + "ol" + } else { + "ul" + } + )); + context.expect_first_item = Some(true); +} + /// To do fn on_enter_list_item_marker(context: &mut CompileContext) { let expect_first_item = context.expect_first_item.take().unwrap(); if expect_first_item { context.tag(">".to_string()); - } else { - on_exit_list_item(context); } context.line_ending_if_needed(); @@ -1204,15 +1261,6 @@ fn on_enter_list_item_marker(context: &mut CompileContext) { context.last_was_tag = false; } -/// To do (onenterlist{un,}ordered) -fn on_enter_list(context: &mut CompileContext) { - // To do: !token._loose - context.tight_stack.push(false); - context.line_ending_if_needed(); - context.tag("<ol".to_string()); // To do: `ol` / `ul`. - context.expect_first_item = Some(true); -} - /// To do fn on_exit_list_item_value(context: &mut CompileContext) { let expect_first_item = context.expect_first_item.unwrap(); @@ -1232,21 +1280,24 @@ fn on_exit_list_item_value(context: &mut CompileContext) { } /// To do. -/// Note: there is no actual `Token::ListItem`. fn on_exit_list_item(context: &mut CompileContext) { // && !context.slurp_all_line_endings if context.last_was_tag { context.line_ending_if_needed(); } - context.tag("</li>".to_string()); // To do: `ol` / `ul`. + context.tag("</li>".to_string()); // context.slurp_all_line_endings = false; } /// To do. fn on_exit_list(context: &mut CompileContext) { - on_exit_list_item(context); + let tag_name = if context.events[context.index].token_type == Token::ListOrdered { + "ol" + } else { + "ul" + }; context.tight_stack.pop(); context.line_ending(); - context.tag("</ol>".to_string()); // To do: `ol` / `ul`. + context.tag(format!("</{}>", tag_name)); } diff --git a/src/construct/list.rs b/src/construct/list.rs index 96b2496..b4ae9b1 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -1,14 +1,21 @@ //! To do. use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}; -use crate::construct::partial_space_or_tab::space_or_tab_min_max; +use crate::construct::{ + blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max, +}; use crate::token::Token; -use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; +use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, Tokenizer}; +use crate::util::{ + edit_map::EditMap, + skip, + span::{codes as codes_from_span, from_exit_event}, +}; -/// Type of title. +/// Type of list. #[derive(Debug, PartialEq)] enum Kind { - /// In a dot (`.`) list. + /// In a dot (`.`) list item. /// /// ## Example /// @@ -16,7 +23,7 @@ enum Kind { /// 1. a /// ``` Dot, - /// In a paren (`)`) list. + /// In a paren (`)`) list item. /// /// ## Example /// @@ -24,7 +31,7 @@ enum Kind { /// 1) a /// ``` Paren, - /// In an asterisk (`*`) list. + /// In an asterisk (`*`) list item. /// /// ## Example /// @@ -32,7 +39,7 @@ enum Kind { /// * a /// ``` Asterisk, - /// In a plus (`+`) list. + /// In a plus (`+`) list item. /// /// ## Example /// @@ -40,7 +47,7 @@ enum Kind { /// + a /// ``` Plus, - /// In a dash (`-`) list. + /// In a dash (`-`) list item. /// /// ## Example /// @@ -51,16 +58,16 @@ enum Kind { } impl Kind { - /// Turn the kind into a [char]. - fn as_char(&self) -> char { - match self { - Kind::Dot => '.', - Kind::Paren => ')', - Kind::Asterisk => '*', - Kind::Plus => '+', - Kind::Dash => '-', - } - } + // /// Turn the kind into a [char]. + // fn as_char(&self) -> char { + // match self { + // Kind::Dot => '.', + // Kind::Paren => ')', + // Kind::Asterisk => '*', + // Kind::Plus => '+', + // Kind::Dash => '-', + // } + // } /// Turn a [char] into a kind. /// /// ## Panics @@ -101,13 +108,13 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // Unordered. Code::Char('*' | '+' | '-') => { // To do: check if this is a thematic break? - tokenizer.enter(Token::List); + tokenizer.enter(Token::ListItem); tokenizer.enter(Token::ListItemPrefix); marker(tokenizer, code) } // Ordered. Code::Char(char) if char.is_ascii_digit() => { - tokenizer.enter(Token::List); + tokenizer.enter(Token::ListItem); tokenizer.enter(Token::ListItemPrefix); tokenizer.enter(Token::ListItemValue); // To do: `interrupt || !1`? @@ -119,10 +126,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// To do. fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResult { + size += 1; match code { Code::Char(char) if char.is_ascii_digit() && size < LIST_ITEM_VALUE_SIZE_MAX => { tokenizer.consume(code); - size += 1; (State::Fn(Box::new(move |t, c| inside(t, c, size))), None) } // To do: `(!self.interrupt || size < 2)` @@ -172,6 +179,7 @@ fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // To do: calculate size. tokenizer.exit(Token::ListItemPrefix); + tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve)); (State::Ok, Some(vec![code])) } @@ -189,7 +197,163 @@ fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> (State::Ok, Some(vec![code])) } -/// End of a block quote. +/// To do. +pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.check(blank_line, |ok| { + let func = if ok { blank_cont } else { not_blank_cont }; + Box::new(func) + })(tokenizer, code) +} + +pub fn blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + println!("cont: blank"); + // self.containerState.furtherBlankLines = + // self.containerState.furtherBlankLines || + // self.containerState.initialBlankLine + + // We have a blank line. + // Still, try to consume at most the items size. + // To do: eat at most `size` whitespace. + tokenizer.go(space_or_tab_min_max(0, TAB_SIZE), blank_cont_after)(tokenizer, code) +} + +pub fn blank_cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + println!("cont: blank: after"); + (State::Ok, Some(vec![code])) +} + +pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + println!("cont: not blank"); + // if (self.containerState.furtherBlankLines || !markdownSpace(code)) nok + // To do: eat exactly `size` whitespace. + tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), blank_cont_after)(tokenizer, code) +} + +/// To do. pub fn end() -> Vec<Token> { - vec![Token::List] + vec![Token::ListItem] +} + +/// To do. +pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { + let mut edit_map = EditMap::new(); + + let mut index = 0; + println!("list item:before: {:?}", tokenizer.events.len()); + while index < tokenizer.events.len() { + let event = &tokenizer.events[index]; + println!( + "ev: {:?} {:?} {:?} {:?} {:?} {:?}", + index, + event.event_type, + event.token_type, + event.content_type, + event.previous, + event.next + ); + index += 1; + } + + let mut index = 0; + let mut balance = 0; + let mut list_items: Vec<(Kind, usize, usize, usize)> = vec![]; + // To do: track balance? Or, check what’s between them? + + while index < tokenizer.events.len() { + let event = &tokenizer.events[index]; + + if event.token_type == Token::ListItem { + if event.event_type == EventType::Enter { + let end = skip::opt(&tokenizer.events, index, &[Token::ListItem]) - 1; + let marker = skip::to(&tokenizer.events, index, &[Token::ListItemMarker]) + 1; + let codes = codes_from_span( + &tokenizer.parse_state.codes, + &from_exit_event(&tokenizer.events, marker), + ); + let kind = Kind::from_code(codes[0]); + let current = (kind, balance, index, end); + + let previous = list_items.last(); + let mut matched = false; + + // There’s a previous list item. + if let Some(previous) = previous { + // …with the same marker and depth, and with only (blank) line endings between them. + if previous.0 == current.0 + && previous.1 == current.1 + && skip::opt( + &tokenizer.events, + previous.3 + 1, + &[Token::LineEnding, Token::BlankLineEnding], + ) == current.2 + { + matched = true; + } + } + + if matched { + let previous = list_items.last_mut().unwrap(); + previous.3 = current.3; + } else { + // let previous = list_items.pop(); + // if let Some(previous) = previous { + // lists.push(previous); + // } + + println!("prev:!match {:?} {:?}", previous, current); + list_items.push(current); + } + + println!("enter: {:?}", event.token_type); + balance += 1; + } else { + println!("exit: {:?}", event.token_type); + balance -= 1; + } + } + + index += 1; + } + + let mut index = 0; + while index < list_items.len() { + let list_item = &list_items[index]; + let mut list_start = tokenizer.events[list_item.2].clone(); + let token_type = if matches!(list_item.0, Kind::Paren | Kind::Dot) { + Token::ListOrdered + } else { + Token::ListUnordered + }; + list_start.token_type = token_type.clone(); + let mut list_end = tokenizer.events[list_item.3].clone(); + list_end.token_type = token_type; + println!("inject: {:?} {:?}", list_start, list_end); + + edit_map.add(list_item.2, 0, vec![list_start]); + edit_map.add(list_item.3 + 1, 0, vec![list_end]); + + index += 1; + } + + println!("list items: {:#?}", list_items); + + let events = edit_map.consume(&mut tokenizer.events); + + let mut index = 0; + println!("list item:after: {:?}", events.len()); + while index < events.len() { + let event = &events[index]; + println!( + "ev: {:?} {:?} {:?} {:?} {:?} {:?}", + index, + event.event_type, + event.token_type, + event.content_type, + event.previous, + event.next + ); + index += 1; + } + + events } diff --git a/src/content/document.rs b/src/content/document.rs index e32534e..c5bf5c8 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -10,7 +10,7 @@ use crate::construct::{ block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote}, - list::{end as list_end, start as list}, + list::{cont as list_const, end as list_end, start as list}, }; use crate::content::flow::start as flow; use crate::parser::ParseState; @@ -100,7 +100,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR let cont = if name == "blockquote" { block_quote_cont } else if name == "list" { - unreachable!("todo: list cont {:?}", name) + list_const } else { unreachable!("todo: cont construct {:?}", name) }; @@ -183,7 +183,8 @@ fn there_is_a_new_container( name: String, ) -> StateFnResult { let size = info.continued; - info = exit_containers(tokenizer, info, size, true); + println!("exit:0: {:?}", false); + info = exit_containers(tokenizer, info, size, false); tokenizer.expect(code, true); // Remove from the event stack. @@ -272,6 +273,7 @@ fn exit_containers( let mut index = 0; while index < token_types.len() { let token_type = &token_types[index]; + println!("creating exit: {:?}", token_type); exits.push(Event { event_type: EventType::Exit, @@ -289,7 +291,16 @@ fn exit_containers( } if !exits.is_empty() { - let index = info.inject.len() - 1 - (if before { 1 } else { 0 }); + let before = if before { 1 } else { 0 }; + let mut index = info.inject.len() - 1; + println!("inject: {:?} {:?}", info.inject.len() - 1, before); + if before >= index { + // To do: maybe, if this branch happens, it’s a bug? + println!("inject:0: {:?}", index); + index = 0; + } else { + println!("set: {:?}", index); + } info.inject[index].1.append(&mut exits); } @@ -377,6 +388,7 @@ fn flow_end( } // Exit containers. + println!("exit:1: {:?}", true); info = exit_containers(tokenizer, info, continued, true); tokenizer.expect(code, true); @@ -386,6 +398,7 @@ fn flow_end( match result { State::Ok => { + println!("exit:3: {:?}", false); info = exit_containers(tokenizer, info, 0, false); tokenizer.expect(code, true); @@ -433,7 +446,7 @@ fn flow_end( tokenizer.events = map.consume(&mut tokenizer.events); let mut index = 0; - println!("after: {:?}", tokenizer.events.len()); + println!("document:after: {:?}", tokenizer.events.len()); while index < tokenizer.events.len() { let event = &tokenizer.events[index]; println!( diff --git a/src/token.rs b/src/token.rs index f60f9cd..889c3ba 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1767,7 +1767,9 @@ pub enum Token { ThematicBreakSequence, // To do: sort. - List, + ListOrdered, + ListUnordered, + ListItem, ListItemPrefix, ListItemValue, ListItemMarker, diff --git a/src/util/skip.rs b/src/util/skip.rs index 971beb6..3307734 100644 --- a/src/util/skip.rs +++ b/src/util/skip.rs @@ -1,7 +1,7 @@ //! Utilities to deal with lists of events. use crate::token::Token; -use crate::tokenizer::Event; +use crate::tokenizer::{Event, EventType}; /// Skip from `index`, optionally past `token_types`. pub fn opt(events: &[Event], index: usize, token_types: &[Token]) -> usize { @@ -13,33 +13,60 @@ pub fn opt_back(events: &[Event], index: usize, token_types: &[Token]) -> usize skip_opt_with_direction(events, index, token_types, false) } +pub fn to(events: &[Event], mut index: usize, token_types: &[Token]) -> usize { + while index < events.len() { + let current = &events[index].token_type; + + if token_types.contains(current) { + break; + } + + index += 1; + } + + index +} + /// Skip internals. fn skip_opt_with_direction( events: &[Event], - index: usize, + mut index: usize, token_types: &[Token], forward: bool, ) -> usize { - let mut index = index; + let mut balance = 0; + let open = if forward { + EventType::Enter + } else { + EventType::Exit + }; while index < events.len() { let current = &events[index].token_type; - if !token_types.contains(current) { + if !token_types.contains(current) || events[index].event_type != open { break; } - // assert_eq!(events[index].event_type, EventType::Enter); index = if forward { index + 1 } else { index - 1 }; + balance += 1; loop { - if events[index].token_type == *current { - // assert_eq!(events[index].event_type, EventType::Exit); + balance = if events[index].event_type == open { + balance + 1 + } else { + balance - 1 + }; + + if events[index].token_type == *current && balance == 0 { + println!("close:it! {:?} {:?}", events[index].token_type, balance); index = if forward { index + 1 } else { index - 1 }; + println!("index:break: {:?}", index); break; } index = if forward { index + 1 } else { index - 1 }; + println!("index:loop: {:?}", index); } } |