From 8183323c432fc1359c634feb68bc372e13f0bd03 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 14 Jul 2022 17:40:10 +0200 Subject: Add support for container state * Fix to parse list item continuation based on how big the initial list item prefix was * Fix list items that start with blank lines --- src/compiler.rs | 21 +++++++++-- src/construct/list.rs | 95 +++++++++++++++++++++++++++---------------------- src/content/document.rs | 39 +++++++++++++++----- src/tokenizer.rs | 10 ++++++ src/util/skip.rs | 18 +++++++--- src/util/span.rs | 4 +-- 6 files changed, 125 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/compiler.rs b/src/compiler.rs index 37229a4..51c7e2b 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1150,7 +1150,9 @@ fn on_exit_media(context: &mut CompileContext) { fn on_exit_paragraph(context: &mut CompileContext) { let tight = context.tight_stack.last().unwrap_or(&false); - if !tight { + if *tight { + context.slurp_one_line_ending = true; + } else { context.tag("

".to_string()); } } @@ -1216,6 +1218,14 @@ fn on_enter_list(context: &mut CompileContext) { if balance < 3 && event.token_type == Token::BlankLineEnding // && !(balance == 1 && events[index - 2].token_type == Token::ListItem) { + let at_marker = balance == 2 + && events[skip::opt_back( + events, + index - 2, + &[Token::BlankLineEnding, Token::SpaceOrTab], + )] + .token_type + == Token::ListItemPrefix; let at_list_item = balance == 1 && events[index - 2].token_type == Token::ListItem; let at_empty_list_item = if at_list_item { let before_item = skip::opt_back(events, index - 2, &[Token::ListItem]); @@ -1229,7 +1239,7 @@ fn on_enter_list(context: &mut CompileContext) { false }; - if !at_list_item || !at_empty_list_item { + if !at_marker && (!at_list_item || !at_empty_list_item) { loose = true; break; } @@ -1297,7 +1307,12 @@ fn on_exit_list_item(context: &mut CompileContext) { let before_item = skip::opt_back( context.events, context.index - 1, - &[Token::BlankLineEnding, Token::LineEnding, Token::SpaceOrTab], + &[ + Token::BlankLineEnding, + Token::LineEnding, + Token::SpaceOrTab, + Token::BlockQuotePrefix, + ], ); let previous = &context.events[before_item]; let tight_paragraph = *tight && previous.token_type == Token::Paragraph; diff --git a/src/construct/list.rs b/src/construct/list.rs index d06eaf0..bab821c 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -99,6 +99,7 @@ impl Kind { /// To do. pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.enter(Token::ListItem); // To do: allow arbitrary when code (indented) is turned off. tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) } @@ -108,12 +109,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { // Unordered. Code::Char('*' | '+' | '-') => tokenizer.check(thematic_break, |ok| { - let func = if ok { nok } else { before_unordered }; - Box::new(func) + Box::new(if ok { nok } else { before_unordered }) })(tokenizer, code), // Ordered. Code::Char(char) if char.is_ascii_digit() => { - tokenizer.enter(Token::ListItem); tokenizer.enter(Token::ListItemPrefix); tokenizer.enter(Token::ListItemValue); // To do: `interrupt || !1`? @@ -125,8 +124,6 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// To do. fn before_unordered(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - // To do: check if this is a thematic break? - tokenizer.enter(Token::ListItem); tokenizer.enter(Token::ListItemPrefix); marker(tokenizer, code) } @@ -163,7 +160,6 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let interrupt = tokenizer.interrupt; tokenizer.check(blank_line, move |ok| { - println!("check:blank_line:after {:?} {:?}", ok, interrupt); let func = if ok { if interrupt { nok @@ -179,9 +175,12 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// To do. fn on_blank(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if let Some(container) = tokenizer.container.as_mut() { + container.blank_initial = true; + } + // self.containerState.initialBlankLine = true - // initialSize++ - prefix_end(tokenizer, code) + prefix_end(tokenizer, code, true) } /// To do. @@ -189,8 +188,11 @@ fn marker_after_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let interrupt = tokenizer.interrupt; tokenizer.attempt(list_item_prefix_whitespace, move |ok| { println!("marker:after:after: {:?} {:?}", ok, interrupt); - let func = if ok { prefix_end } else { prefix_other }; - Box::new(func) + if ok { + Box::new(|t, c| prefix_end(t, c, false)) + } else { + Box::new(prefix_other) + } })(tokenizer, code) } @@ -203,15 +205,25 @@ fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::SpaceOrTab); tokenizer.consume(code); tokenizer.exit(Token::SpaceOrTab); - (State::Fn(Box::new(prefix_end)), None) + (State::Fn(Box::new(|t, c| prefix_end(t, c, false))), None) } _ => (State::Nok, None), } } /// To do. -fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - // To do: calculate size. +fn prefix_end(tokenizer: &mut Tokenizer, code: Code, blank: bool) -> StateFnResult { + let start = skip::to_back( + &tokenizer.events, + tokenizer.events.len() - 1, + &[Token::ListItem], + ); + let prefix = tokenizer.index - tokenizer.events[start].index + (if blank { 1 } else { 0 }); + + if let Some(container) = tokenizer.container.as_mut() { + container.size = prefix; + } + tokenizer.exit(Token::ListItemPrefix); tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve)); (State::Ok, Some(vec![code])) @@ -221,14 +233,17 @@ fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn list_item_prefix_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // To do: check how big this should be? tokenizer.go( - space_or_tab_min_max(1, TAB_SIZE - 1), + space_or_tab_min_max(1, TAB_SIZE), list_item_prefix_whitespace_after, )(tokenizer, code) } fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - // To do: check some stuff? - (State::Ok, Some(vec![code])) + if matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) { + (State::Nok, None) + } else { + (State::Ok, Some(vec![code])) + } } /// To do. @@ -240,46 +255,40 @@ fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult { pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.check(blank_line, |ok| { println!("cont:check:blank:after: {:?}", ok); - let func = if ok { blank_cont } else { not_blank_cont }; - Box::new(func) + Box::new(if ok { blank_cont } else { not_blank_cont }) })(tokenizer, code) } pub fn blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - // self.containerState.furtherBlankLines = - // self.containerState.furtherBlankLines || - // self.containerState.initialBlankLine + let mut size = 0; + if let Some(container) = tokenizer.container.as_ref() { + size = container.size; + + if container.blank_initial { + return (State::Nok, None); + } + } // We have a blank line. // Still, try to consume at most the items size. // To do: eat at most `size` whitespace. - tokenizer.go(space_or_tab_min_max(0, TAB_SIZE), blank_cont_after)(tokenizer, code) -} - -pub fn blank_cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - println!("cont: blank: after"); - (State::Ok, Some(vec![code])) + tokenizer.go(space_or_tab_min_max(0, size), cont_after)(tokenizer, code) } pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - let index = tokenizer.events.len(); - let currently_blank = - index > 0 && tokenizer.events[index - 1].token_type == Token::BlankLineEnding; - let mut further_blank = false; - - if currently_blank && index > 5 { - let before = skip::opt_back(&tokenizer.events, index - 3, &[Token::SpaceOrTab]); - further_blank = tokenizer.events[before].token_type == Token::BlankLineEnding; - } + let mut size = 0; - if further_blank || !matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) { - println!("cont: not blank after further blank, or not blank w/o whitespace"); - (State::Nok, None) - } else { - println!("cont: not blank"); - // To do: eat exactly `size` whitespace. - tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), blank_cont_after)(tokenizer, code) + if let Some(container) = tokenizer.container.as_mut() { + container.blank_initial = false; + size = container.size; } + + tokenizer.go(space_or_tab_min_max(size, size), cont_after)(tokenizer, code) +} + +pub fn cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + println!("cont: blank: after"); + (State::Ok, Some(vec![code])) } /// To do. diff --git a/src/content/document.rs b/src/content/document.rs index f6b8f55..bec0039 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -16,7 +16,9 @@ use crate::content::flow::start as flow; use crate::parser::ParseState; use crate::subtokenize::subtokenize; use crate::token::Token; -use crate::tokenizer::{Code, Event, EventType, Point, State, StateFn, StateFnResult, Tokenizer}; +use crate::tokenizer::{ + Code, ContainerState, Event, EventType, Point, State, StateFn, StateFnResult, Tokenizer, +}; use crate::util::edit_map::EditMap; use crate::util::{ normalize_identifier::normalize_identifier, @@ -37,6 +39,7 @@ struct DocumentInfo { paragraph_before: bool, inject: Vec<(Vec, Vec)>, stack: Vec, + states: Vec, stack_close: Vec, next: Box, } @@ -86,6 +89,7 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { next: Box::new(flow), paragraph_before: false, stack: vec![], + states: vec![], stack_close: vec![], }; line_start(tokenizer, code, info) @@ -104,7 +108,7 @@ fn line_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> fn container_existing_before( tokenizer: &mut Tokenizer, code: Code, - info: DocumentInfo, + mut info: DocumentInfo, ) -> StateFnResult { println!("container_existing_before"); @@ -120,17 +124,20 @@ fn container_existing_before( // continuation line. if info.continued < info.stack.len() { let kind = &info.stack[info.continued]; + let container = info.states.remove(info.continued); + tokenizer.container = Some(container); let cont = match kind { Container::BlockQuote => block_quote_cont, Container::ListItem => list_item_const, }; + // tokenizer.container = Some(&mut info.states[info.continued]); // To do: state? tokenizer.attempt(cont, move |ok| { if ok { Box::new(|t, c| container_existing_after(t, c, info)) } else { - Box::new(|t, c| container_new_before(t, c, info)) + Box::new(|t, c| container_existing_missing(t, c, info)) } })(tokenizer, code) } else { @@ -139,12 +146,24 @@ fn container_existing_before( } } +fn container_existing_missing( + tokenizer: &mut Tokenizer, + code: Code, + mut info: DocumentInfo, +) -> StateFnResult { + let container = tokenizer.container.take().unwrap(); + info.states.insert(info.continued, container); + container_new_before(tokenizer, code, info) +} + fn container_existing_after( tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo, ) -> StateFnResult { println!("container_existing_after"); + let container = tokenizer.container.take().unwrap(); + info.states.insert(info.continued, container); info.continued += 1; container_existing_before(tokenizer, code, info) } @@ -179,17 +198,16 @@ fn container_new_before( // self.interrupt = Boolean( // childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack // ) - } else { - // println!(" set interrupt to `false`! (before: {:?})", tokenizer.interrupt); - // tokenizer.interrupt = false; } + tokenizer.container = Some(ContainerState::default()); // Check if there is a new container. tokenizer.attempt(block_quote, move |ok| { if ok { Box::new(|t, c| container_new_after(t, c, info, Container::BlockQuote)) } else { Box::new(|tokenizer, code| { + tokenizer.container = Some(ContainerState::default()); tokenizer.attempt(list_item, move |ok| { if ok { Box::new(|t, c| container_new_after(t, c, info, Container::ListItem)) @@ -240,12 +258,15 @@ fn container_new_after( if info.continued < info.stack.len() { info.stack_close .append(&mut info.stack.drain(info.continued..).collect::>()); + info.states.truncate(info.continued); info = line_end(tokenizer, info, false, true); tokenizer.expect(code, true); } + let container = tokenizer.container.take().unwrap(); + info.states.push(container); info.stack.push(kind); - info.continued = info.stack.len(); + info.continued = info.stack.len(); // To do: `+= 1`? container_new_before(tokenizer, code, info) } @@ -261,7 +282,6 @@ fn containers_after( info.inject.last_mut().unwrap().0.append(&mut containers); tokenizer.lazy = info.continued != info.stack.len(); - println!("lazy: {:?} {:?}", info.continued, info.stack.len()); // Define start. let point = tokenizer.point.clone(); @@ -331,6 +351,7 @@ fn flow_end( if !lazy && info.continued < info.stack.len() { info.stack_close .append(&mut info.stack.drain(info.continued..).collect::>()); + info.states.truncate(info.continued); } info = line_end(tokenizer, info, false, false); @@ -500,7 +521,7 @@ fn line_end( info.inject[index].1.append(&mut exits); println!( - " setting `interrupt: false` (before: {:?}", + " setting `interrupt: false` (before: {:?})", tokenizer.interrupt ); tokenizer.interrupt = false; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 163c2bf..34cfde3 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -130,6 +130,13 @@ pub struct Media { pub id: String, } +/// To do. +#[derive(Default, Debug)] +pub struct ContainerState { + pub blank_initial: bool, + pub size: usize, +} + /// The internal state of a tokenizer, not to be confused with states from the /// state machine, this instead is all the information about where we currently /// are and what’s going on. @@ -203,6 +210,8 @@ pub struct Tokenizer<'a> { pub concrete: bool, /// To do. pub lazy: bool, + /// To do. + pub container: Option, } impl<'a> Tokenizer<'a> { @@ -225,6 +234,7 @@ impl<'a> Tokenizer<'a> { interrupt: false, concrete: false, lazy: false, + container: None, resolvers: vec![], resolver_ids: vec![], } diff --git a/src/util/skip.rs b/src/util/skip.rs index 10ba364..d2ad914 100644 --- a/src/util/skip.rs +++ b/src/util/skip.rs @@ -5,15 +5,23 @@ use crate::tokenizer::{Event, EventType}; /// Skip from `index`, optionally past `token_types`. pub fn opt(events: &[Event], index: usize, token_types: &[Token]) -> usize { - skip_opt_with_direction(events, index, token_types, true) + skip_opt_impl(events, index, token_types, true) } /// Skip from `index`, optionally past `token_types`, backwards. pub fn opt_back(events: &[Event], index: usize, token_types: &[Token]) -> usize { - skip_opt_with_direction(events, index, token_types, false) + skip_opt_impl(events, index, token_types, false) } -pub fn to(events: &[Event], mut index: usize, token_types: &[Token]) -> usize { +pub fn to_back(events: &[Event], index: usize, token_types: &[Token]) -> usize { + to_impl(events, index, token_types, false) +} + +pub fn to(events: &[Event], index: usize, token_types: &[Token]) -> usize { + to_impl(events, index, token_types, true) +} + +pub fn to_impl(events: &[Event], mut index: usize, token_types: &[Token], forward: bool) -> usize { while index < events.len() { let current = &events[index].token_type; @@ -21,14 +29,14 @@ pub fn to(events: &[Event], mut index: usize, token_types: &[Token]) -> usize { break; } - index += 1; + index = if forward { index + 1 } else { index - 1 }; } index } /// Skip internals. -fn skip_opt_with_direction( +fn skip_opt_impl( events: &[Event], mut index: usize, token_types: &[Token], diff --git a/src/util/span.rs b/src/util/span.rs index 32dd00f..72b451d 100644 --- a/src/util/span.rs +++ b/src/util/span.rs @@ -6,9 +6,9 @@ use crate::util::codes::serialize as serialize_codes; /// A struct representing the span of an opening and closing event of a token. #[derive(Debug)] pub struct Span { - /// Absolute offset (and `index` in `codes`) of where this span starts. + /// Absolute offset (an `index` in `codes`) of where this span starts. pub start_index: usize, - /// Absolute offset (and `index` in `codes`) of where this span ends. + /// Absolute offset (an `index` in `codes`) of where this span ends. pub end_index: usize, } -- cgit