diff options
Diffstat (limited to '')
-rw-r--r-- | src/compiler.rs | 55 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 7 | ||||
-rw-r--r-- | src/construct/list.rs | 4 | ||||
-rw-r--r-- | src/content/document.rs | 332 |
4 files changed, 221 insertions, 177 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 4b38c8d..37229a4 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -8,6 +8,7 @@ use crate::util::{ decode_character_reference::{decode_named, decode_numeric}, encode::encode, sanitize_uri::sanitize_uri, + skip, span::{codes as codes_from_span, from_exit_event, serialize}, }; use std::collections::HashMap; @@ -241,7 +242,6 @@ struct CompileContext<'a> { pub tight_stack: Vec<bool>, /// Fields used to influance the current compilation. pub slurp_one_line_ending: bool, - pub slurp_all_line_endings: bool, pub tags: bool, pub ignore_encode: bool, pub last_was_tag: bool, @@ -276,7 +276,6 @@ impl<'a> CompileContext<'a> { definitions: HashMap::new(), tight_stack: vec![], slurp_one_line_ending: false, - slurp_all_line_endings: false, tags: true, ignore_encode: false, last_was_tag: false, @@ -718,8 +717,6 @@ fn on_enter_paragraph(context: &mut CompileContext) { context.line_ending_if_needed(); context.tag("<p>".to_string()); } - - context.slurp_all_line_endings = false; } /// Handle [`Enter`][EventType::Enter]:[`Resource`][Token::Resource]. @@ -785,7 +782,6 @@ fn on_exit_block_quote(context: &mut CompileContext) { context.tight_stack.pop(); context.line_ending_if_needed(); context.tag("</blockquote>".to_string()); - context.slurp_all_line_endings = false; } /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker]. @@ -1075,9 +1071,7 @@ fn on_exit_label_text(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding]. fn on_exit_line_ending(context: &mut CompileContext) { - if context.slurp_all_line_endings { - // Empty. - } else if context.slurp_one_line_ending { + if context.slurp_one_line_ending { context.slurp_one_line_ending = false; } else { context.push(context.encode_opt(&serialize( @@ -1156,9 +1150,7 @@ fn on_exit_media(context: &mut CompileContext) { fn on_exit_paragraph(context: &mut CompileContext) { let tight = context.tight_stack.last().unwrap_or(&false); - if *tight { - context.slurp_all_line_endings = true; - } else { + if !tight { context.tag("</p>".to_string()); } } @@ -1218,10 +1210,29 @@ fn on_enter_list(context: &mut CompileContext) { } else { balance -= 1; - // Blank line directly in list or directly in list item. - if balance < 3 && event.token_type == Token::BlankLineEnding { - loose = true; - break; + // Blank line directly in list or directly in list item, + // but not a blank line after an empty list item. + // To do: does this check if the item is empty? + if balance < 3 && event.token_type == Token::BlankLineEnding + // && !(balance == 1 && events[index - 2].token_type == Token::ListItem) + { + let at_list_item = balance == 1 && events[index - 2].token_type == Token::ListItem; + let at_empty_list_item = if at_list_item { + let before_item = skip::opt_back(events, index - 2, &[Token::ListItem]); + let before_prefix = skip::opt_back( + events, + index - 3, + &[Token::ListItemPrefix, Token::SpaceOrTab], + ); + before_item + 1 == before_prefix + } else { + false + }; + + if !at_list_item || !at_empty_list_item { + loose = true; + break; + } } // Done. @@ -1233,7 +1244,6 @@ fn on_enter_list(context: &mut CompileContext) { index += 1; } - println!("list: {:?} {:?}", token_type, loose); context.tight_stack.push(!loose); context.line_ending_if_needed(); // Note: no `>`. @@ -1283,12 +1293,21 @@ fn on_exit_list_item_value(context: &mut CompileContext) { /// To do. fn on_exit_list_item(context: &mut CompileContext) { - if context.last_was_tag && !context.slurp_all_line_endings { + let tight = context.tight_stack.last().unwrap_or(&false); + let before_item = skip::opt_back( + context.events, + context.index - 1, + &[Token::BlankLineEnding, Token::LineEnding, Token::SpaceOrTab], + ); + let previous = &context.events[before_item]; + let tight_paragraph = *tight && previous.token_type == Token::Paragraph; + let empty_item = previous.token_type == Token::ListItemPrefix; + + if !tight_paragraph && !empty_item { context.line_ending_if_needed(); } context.tag("</li>".to_string()); - context.slurp_all_line_endings = false; } /// To do. diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index a8b1efc..3300d2f 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -207,7 +207,6 @@ struct Info { /// pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::HtmlFlow); - tokenizer.enter(Token::HtmlFlowData); // To do: allow arbitrary when code (indented) is turned off. tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) } @@ -219,6 +218,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if Code::Char('<') == code { + tokenizer.enter(Token::HtmlFlowData); tokenizer.consume(code); (State::Fn(Box::new(open)), None) } else { @@ -771,11 +771,12 @@ fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') if info.kind == Kind::Basic || info.kind == Kind::Complete => { + tokenizer.exit(Token::HtmlFlowData); tokenizer.check(blank_line_before, |ok| { let func = if ok { - continuation_close + html_continue_after } else { - continuation_at_line_ending + html_continue_start // continuation_at_line_ending }; Box::new(move |t, c| func(t, c, info)) })(tokenizer, code) diff --git a/src/construct/list.rs b/src/construct/list.rs index 960c0eb..d06eaf0 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -267,7 +267,7 @@ pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { index > 0 && tokenizer.events[index - 1].token_type == Token::BlankLineEnding; let mut further_blank = false; - if currently_blank && index > 3 { + if currently_blank && index > 5 { let before = skip::opt_back(&tokenizer.events, index - 3, &[Token::SpaceOrTab]); further_blank = tokenizer.events[before].token_type == Token::BlankLineEnding; } @@ -338,7 +338,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { && skip::opt( &tokenizer.events, previous.3 + 1, - &[Token::LineEnding, Token::BlankLineEnding], + &[Token::SpaceOrTab, Token::LineEnding, Token::BlankLineEnding], ) == current.2 { println!("prev:match {:?} {:?}", previous, current); diff --git a/src/content/document.rs b/src/content/document.rs index b29e4b9..f6b8f55 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -10,7 +10,7 @@ use crate::construct::{ block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote}, - list::{cont as list_const, end as list_end, start as list}, + list::{cont as list_item_const, end as list_item_end, start as list_item}, }; use crate::content::flow::start as flow; use crate::parser::ParseState; @@ -25,12 +25,19 @@ use crate::util::{ }; use std::collections::HashSet; +#[derive(Debug, PartialEq)] +enum Container { + BlockQuote, + ListItem, +} + struct DocumentInfo { continued: usize, - containers_begin_index: usize, + index: usize, paragraph_before: bool, inject: Vec<(Vec<Event>, Vec<Event>)>, - stack: Vec<String>, + stack: Vec<Container>, + stack_close: Vec<Container>, next: Box<StateFn>, } @@ -73,18 +80,34 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let info = DocumentInfo { + index: 0, continued: 0, - paragraph_before: false, inject: vec![], - containers_begin_index: 0, - stack: vec![], next: Box::new(flow), + paragraph_before: false, + stack: vec![], + stack_close: vec![], }; - before(tokenizer, code, info) + line_start(tokenizer, code, info) +} + +/// Start of a new line. +fn line_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { + println!("line_start"); + info.index = tokenizer.events.len(); + info.inject.push((vec![], vec![])); + info.continued = 0; + container_existing_before(tokenizer, code, info) } -fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { - println!("before"); +/// Before existing containers. +fn container_existing_before( + tokenizer: &mut Tokenizer, + code: Code, + info: DocumentInfo, +) -> StateFnResult { + println!("container_existing_before"); + // First we iterate through the open blocks, starting with the root // document, and descending through last children down to the last open // block. @@ -96,45 +119,42 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR // But we cannot close unmatched blocks yet, because we may have a lazy // continuation line. if info.continued < info.stack.len() { - let name = &info.stack[info.continued]; - let cont = if name == "blockquote" { - block_quote_cont - } else if name == "list" { - list_const - } else { - unreachable!("todo: cont construct {:?}", name) + let kind = &info.stack[info.continued]; + let cont = match kind { + Container::BlockQuote => block_quote_cont, + Container::ListItem => list_item_const, }; // To do: state? tokenizer.attempt(cont, move |ok| { if ok { - Box::new(|t, c| document_continue(t, c, info)) + Box::new(|t, c| container_existing_after(t, c, info)) } else { - Box::new(|t, c| check_new_containers(t, c, info)) + Box::new(|t, c| container_new_before(t, c, info)) } })(tokenizer, code) } else { // Done. - check_new_containers(tokenizer, code, info) + container_new_before(tokenizer, code, info) } } -fn document_continue( +fn container_existing_after( tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo, ) -> StateFnResult { - println!("document_continue"); + println!("container_existing_after"); info.continued += 1; - before(tokenizer, code, info) + container_existing_before(tokenizer, code, info) } -fn check_new_containers( +fn container_new_before( tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo, ) -> StateFnResult { - println!("check_new_containers"); + println!("container_new_before"); // Next, after consuming the continuation markers for existing blocks, we // look for new block starts (e.g. `>` for a block quote). // If we encounter a new block start, we close any blocks unmatched in @@ -146,10 +166,13 @@ fn check_new_containers( // start. if tokenizer.concrete { println!(" concrete"); - return there_is_no_new_container(tokenizer, code, info); + return containers_after(tokenizer, code, info); } - println!(" to do: interrupt ({:?})?", tokenizer.interrupt); + println!( + " to do: set interrupt? (before: {:?})", + tokenizer.interrupt + ); // // If we do have flow, it could still be a blank line, // // but we’d be interrupting it w/ a new container if there’s a current // // construct. @@ -157,20 +180,21 @@ fn check_new_containers( // childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack // ) } else { - tokenizer.interrupt = false; + // println!(" set interrupt to `false`! (before: {:?})", tokenizer.interrupt); + // tokenizer.interrupt = false; } // Check if there is a new container. tokenizer.attempt(block_quote, move |ok| { if ok { - Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string())) + Box::new(|t, c| container_new_after(t, c, info, Container::BlockQuote)) } else { Box::new(|tokenizer, code| { - tokenizer.attempt(list, move |ok| { + tokenizer.attempt(list_item, move |ok| { if ok { - Box::new(|t, c| there_is_a_new_container(t, c, info, "list".to_string())) + Box::new(|t, c| container_new_after(t, c, info, Container::ListItem)) } else { - Box::new(|t, c| there_is_no_new_container(t, c, info)) + Box::new(|t, c| containers_after(t, c, info)) } })(tokenizer, code) }) @@ -178,25 +202,17 @@ fn check_new_containers( })(tokenizer, code) } -fn there_is_a_new_container( +fn container_new_after( tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo, - name: String, + kind: Container, ) -> StateFnResult { - let size = info.continued; - println!("exit:0: {:?}", false); - info = exit_containers(tokenizer, info, size, false); - tokenizer.expect(code, true); - // Remove from the event stack. // We’ll properly add exits at different points manually. - let end = if name == "blockquote" { - block_quote_end - } else if name == "list" { - list_end - } else { - unreachable!("todo: end {:?}", name) + let end = match kind { + Container::BlockQuote => block_quote_end, + Container::ListItem => list_item_end, }; let token_types = end(); @@ -221,118 +237,42 @@ fn there_is_a_new_container( index += 1; } - info.stack.push(name); - document_continue(tokenizer, code, info) -} - -/// Exit open containers. -fn exit_containers( - tokenizer: &mut Tokenizer, - mut info: DocumentInfo, - size: usize, - before: bool, -) -> DocumentInfo { - let mut exits: Vec<Event> = vec![]; - - if info.stack.len() > size { - println!("closing flow"); - let index = tokenizer.events.len(); - let result = tokenizer.flush(info.next); - info.next = Box::new(flow); // This is weird but Rust needs a function there. - assert!(matches!(result.0, State::Ok)); - assert!(result.1.is_none()); - - let mut end = tokenizer.events.len(); - while end > 0 && end > index { - if tokenizer.events[end - 1].token_type != Token::LineEnding { - break; - } - - end -= 1; - } - - let mut add = tokenizer.events.drain(index..end).collect::<Vec<_>>(); - - exits.append(&mut add); - - println!(" setting `interrupt: false`"); - tokenizer.interrupt = false; - } - - while info.stack.len() > size { - let name = info.stack.pop().unwrap(); - - let end = if name == "blockquote" { - block_quote_end - } else if name == "list" { - list_end - } else { - unreachable!("todo: end {:?}", name) - }; - - let token_types = end(); - - let mut index = 0; - while index < token_types.len() { - let token_type = &token_types[index]; - - exits.push(Event { - event_type: EventType::Exit, - token_type: token_type.clone(), - // Note: positions are fixed later. - point: tokenizer.point.clone(), - index: tokenizer.index, - previous: None, - next: None, - content_type: None, - }); - - index += 1; - } - } - - if !exits.is_empty() { - let before = if before { 1 } else { 0 }; - let mut index = info.inject.len() - 1; - if before > index { - // To do: maybe, if this branch happens, it’s a bug? - println!("inject:0: {:?}", index); - index = 0; - } else { - index -= before; - println!("inject:set: {:?}", index); - } - info.inject[index].1.append(&mut exits); + if info.continued < info.stack.len() { + info.stack_close + .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>()); + info = line_end(tokenizer, info, false, true); + tokenizer.expect(code, true); } - info + info.stack.push(kind); + info.continued = info.stack.len(); + container_new_before(tokenizer, code, info) } -fn there_is_no_new_container( +fn containers_after( tokenizer: &mut Tokenizer, code: Code, - info: DocumentInfo, + mut info: DocumentInfo, ) -> StateFnResult { - println!("there_is_no_new_container"); - tokenizer.lazy = info.continued != info.stack.len(); - // lineStartOffset = self.now().offset - flow_start(tokenizer, code, info) -} + println!("containers_after"); -fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { - println!("flow_start"); + // Add all container events we parsed. + let mut containers = tokenizer.events.drain(info.index..).collect::<Vec<_>>(); + info.inject.last_mut().unwrap().0.append(&mut containers); - let containers = tokenizer - .events - .drain(info.containers_begin_index..) - .collect::<Vec<_>>(); - - info.inject.push((containers, vec![])); + tokenizer.lazy = info.continued != info.stack.len(); + println!("lazy: {:?} {:?}", info.continued, info.stack.len()); // Define start. let point = tokenizer.point.clone(); tokenizer.define_skip(&point); + flow_start(tokenizer, code, info) +} + +fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { + println!("flow_start"); + let state = info.next; info.next = Box::new(flow); // This is weird but Rust needs a function there. @@ -352,6 +292,7 @@ fn flow_end( ) -> StateFnResult { println!("flow_end: lazy? {:?}", tokenizer.lazy); + // To do: clean this! let index = tokenizer.events.len(); let index = if index > 0 { skip::opt_back(&tokenizer.events, index - 1, &[Token::LineEnding]) @@ -371,15 +312,14 @@ fn flow_end( false }; - let mut continued = info.continued; - let size = info.stack.len(); + let mut lazy = false; if tokenizer.lazy { println!("this line was lazy."); if info.paragraph_before && paragraph { println!("it was another paragraph, which is allowed."); - continued = size; + lazy = true; } else { println!( "it was something else (prev: {:?}, cur: {:?}), which is not allowed.", @@ -388,27 +328,31 @@ fn flow_end( } } - // Exit containers. - println!("exit:1: {:?}", true); - info = exit_containers(tokenizer, info, continued, true); + if !lazy && info.continued < info.stack.len() { + info.stack_close + .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>()); + } + + info = line_end(tokenizer, info, false, false); tokenizer.expect(code, true); - info.continued = 0; info.paragraph_before = paragraph; - info.containers_begin_index = tokenizer.events.len(); match result { State::Ok => { - println!("exit:3: {:?}", false); - info = exit_containers(tokenizer, info, 0, false); - tokenizer.expect(code, true); + info.stack_close + .append(&mut info.stack.drain(..).collect::<Vec<_>>()); + info = line_end(tokenizer, info, true, false); let mut map = EditMap::new(); let mut line_index = 0; let mut index = 0; + println!("injections: {:#?}", info.inject); + let add = info.inject[line_index].0.clone(); let mut first_line_ending_in_run: Option<usize> = None; + println!("inject:enters:0: {:?}", add.len()); map.add(0, 0, add); while index < tokenizer.events.len() { @@ -427,6 +371,11 @@ fn flow_end( index += 1; } if !add.is_empty() { + println!( + "inject:exits:at-{:?}: {:?}", + first_line_ending_in_run, + add.len() + ); map.add(first_line_ending_in_run.unwrap(), 0, add); } } else { @@ -435,6 +384,7 @@ fn flow_end( if !add.is_empty() { // No longer empty. first_line_ending_in_run = None; + println!("inject:enters:at-{:?}: {:?}", index + 1, add.len()); map.add(index + 1, 0, add); } } @@ -448,6 +398,7 @@ fn flow_end( } let mut add = info.inject[line_index].1.clone(); + println!("inject:exits:tail-{:?}: {:?}", index, add.len()); let mut deep_index = 0; while deep_index < add.len() { add[deep_index].point = tokenizer.point.clone(); @@ -479,11 +430,84 @@ fn flow_end( State::Nok => unreachable!("handle nok in `flow`?"), State::Fn(func) => { info.next = func; - before(tokenizer, code, info) + line_start(tokenizer, code, info) } } } +fn line_end( + tokenizer: &mut Tokenizer, + mut info: DocumentInfo, + eof: bool, + containers_before: bool, +) -> DocumentInfo { + let mut stack_close = info.stack_close.drain(..).collect::<Vec<_>>(); + println!("line_end: {:?}", stack_close); + + if stack_close.is_empty() { + return info; + } + + // So, we’re at the end of a line, but we need to close the *previous* line. + if !eof { + println!("closing previous flow"); + tokenizer.define_skip(&tokenizer.point.clone()); + let mut current_events = tokenizer.events.drain(info.index..).collect::<Vec<_>>(); + let next = info.next; + info.next = Box::new(flow); // This is weird but Rust needs a function there. + let result = tokenizer.flush(next); + assert!(matches!(result.0, State::Ok)); + assert!(result.1.is_none()); + + if containers_before { + info.index = tokenizer.events.len(); + } + + tokenizer.events.append(&mut current_events); + } + + let mut exits: Vec<Event> = vec![]; + + while !stack_close.is_empty() { + let kind = stack_close.pop().unwrap(); + let end = match kind { + Container::BlockQuote => block_quote_end, + Container::ListItem => list_item_end, + }; + + let token_types = end(); + + let mut index = 0; + while index < token_types.len() { + let token_type = &token_types[index]; + + exits.push(Event { + event_type: EventType::Exit, + token_type: token_type.clone(), + // Note: positions are fixed later. + point: tokenizer.point.clone(), + index: tokenizer.index, + previous: None, + next: None, + content_type: None, + }); + + index += 1; + } + } + + let index = info.inject.len() - (if eof { 1 } else { 2 }); + info.inject[index].1.append(&mut exits); + + println!( + " setting `interrupt: false` (before: {:?}", + tokenizer.interrupt + ); + tokenizer.interrupt = false; + + info +} + fn eof_eol(code: Code) -> bool { matches!( code, |