diff options
Diffstat (limited to '')
-rw-r--r-- | src/construct/code_fenced.rs | 44 | ||||
-rw-r--r-- | src/construct/code_indented.rs | 23 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 60 | ||||
-rw-r--r-- | src/construct/mod.rs | 1 | ||||
-rw-r--r-- | src/construct/partial_non_lazy_continuation.rs | 26 | ||||
-rw-r--r-- | src/content/document.rs | 207 | ||||
-rw-r--r-- | src/tokenizer.rs | 1 |
7 files changed, 165 insertions, 197 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index c7b2334..18beb92 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -102,7 +102,10 @@ //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE}; -use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::construct::{ + partial_non_lazy_continuation::start as partial_non_lazy_continuation, + partial_space_or_tab::{space_or_tab, space_or_tab_min_max}, +}; use crate::token::Token; use crate::tokenizer::{Code, ContentType, State, StateFnResult, Tokenizer}; use crate::util::span::from_exit_event; @@ -376,22 +379,35 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { let clone = info.clone(); - match code { - Code::None => after(tokenizer, code, info), - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt( - |t, c| close_begin(t, c, info), - |ok| { - if ok { - Box::new(|t, c| after(t, c, clone)) - } else { - Box::new(|t, c| content_before(t, c, clone)) - } - }, - )(tokenizer, code), - _ => unreachable!("expected eof/eol"), + if tokenizer.lazy { + after(tokenizer, code, info) + } else { + tokenizer.check(partial_non_lazy_continuation, |ok| { + if ok { + Box::new(move |t, c| at_non_lazy_break(t, c, clone)) + } else { + Box::new(move |t, c| after(t, c, clone)) + } + })(tokenizer, code) } } +/// To do. +fn at_non_lazy_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + let clone = info.clone(); + + tokenizer.attempt( + |t, c| close_begin(t, c, info), + |ok| { + if ok { + Box::new(|t, c| after(t, c, clone)) + } else { + Box::new(|t, c| content_before(t, c, clone)) + } + }, + )(tokenizer, code) +} + /// Before a closing fence, at the line ending. /// /// ```markdown diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 8966249..74a0938 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -128,17 +128,20 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// cd /// ``` fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - // To do: `nok` if lazy line. - match code { - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - tokenizer.enter(Token::LineEnding); - tokenizer.consume(code); - tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(further_start)), None) + if tokenizer.lazy { + (State::Nok, None) + } else { + match code { + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.enter(Token::LineEnding); + tokenizer.consume(code); + tokenizer.exit(Token::LineEnding); + (State::Fn(Box::new(further_start)), None) + } + _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { + Box::new(if ok { further_end } else { further_begin }) + })(tokenizer, code), } - _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { - Box::new(if ok { further_end } else { further_begin }) - })(tokenizer, code), } } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index f30db3f..a8b1efc 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -100,7 +100,9 @@ use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE}; use crate::construct::{ - blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max, + blank_line::start as blank_line, + partial_non_lazy_continuation::start as partial_non_lazy_continuation, + partial_space_or_tab::space_or_tab_min_max, }; use crate::token::Token; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; @@ -425,7 +427,7 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes info.kind = Kind::Complete; // Do not support complete HTML when interrupting. - if tokenizer.interrupt { + if tokenizer.interrupt && !tokenizer.lazy { (State::Nok, None) } else if info.start_tag { complete_attribute_name_before(tokenizer, code, info) @@ -805,25 +807,52 @@ fn continuation_at_line_ending(tokenizer: &mut Tokenizer, code: Code, info: Info /// asd /// ``` fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + tokenizer.check(partial_non_lazy_continuation, |ok| { + let func = if ok { + html_continue_start_non_lazy + } else { + html_continue_after + }; + Box::new(move |t, c| func(t, c, info)) + })(tokenizer, code) +} + +/// To do. +#[allow(clippy::needless_pass_by_value)] +fn html_continue_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + tokenizer.exit(Token::HtmlFlow); + // Feel free to interrupt. + tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; + (State::Ok, Some(vec![code])) +} + +/// To do. +fn html_continue_start_non_lazy( + tokenizer: &mut Tokenizer, + code: Code, + info: Info, +) -> StateFnResult { match code { - Code::None => { - tokenizer.exit(Token::HtmlFlow); - // Feel free to interrupt. - tokenizer.interrupt = false; - // Restore previous `concrete`. - tokenizer.concrete = info.concrete; - (State::Ok, Some(vec![code])) - } - // To do: do not allow lazy lines. Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); ( - State::Fn(Box::new(|t, c| html_continue_start(t, c, info))), + State::Fn(Box::new(|t, c| html_continue_before(t, c, info))), None, ) } + _ => unreachable!("expected eol"), + } +} + +fn html_continue_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + html_continue_start(tokenizer, code, info) + } _ => { tokenizer.enter(Token::HtmlFlowData); continuation(tokenizer, code, info) @@ -976,12 +1005,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::HtmlFlowData); - tokenizer.exit(Token::HtmlFlow); - // Feel free to interrupt. - tokenizer.interrupt = false; - // Restore previous `concrete`. - tokenizer.concrete = info.concrete; - (State::Ok, Some(vec![code])) + html_continue_after(tokenizer, code, info) } _ => { tokenizer.consume(code); diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ac830ef..06ff4e9 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -84,6 +84,7 @@ pub mod paragraph; pub mod partial_data; pub mod partial_destination; pub mod partial_label; +pub mod partial_non_lazy_continuation; pub mod partial_space_or_tab; pub mod partial_title; pub mod partial_whitespace; diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs new file mode 100644 index 0000000..7964de3 --- /dev/null +++ b/src/construct/partial_non_lazy_continuation.rs @@ -0,0 +1,26 @@ +//! To do. + +use crate::token::Token; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +/// To do. +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.enter(Token::LineEnding); + tokenizer.consume(code); + tokenizer.exit(Token::LineEnding); + (State::Fn(Box::new(non_lazy_after)), None) + } + _ => (State::Nok, None), + } +} + +/// To do. +fn non_lazy_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if tokenizer.lazy { + (State::Nok, None) + } else { + (State::Ok, Some(vec![code])) + } +} diff --git a/src/content/document.rs b/src/content/document.rs index f093a04..a8ff775 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -19,6 +19,7 @@ use crate::tokenizer::{Code, Event, EventType, Point, State, StateFn, StateFnRes use crate::util::edit_map::EditMap; use crate::util::{ normalize_identifier::normalize_identifier, + skip, span::{from_exit_event, serialize}, }; use std::collections::HashSet; @@ -26,6 +27,7 @@ use std::collections::HashSet; struct DocumentInfo { continued: usize, containers_begin_index: usize, + paragraph_before: bool, inject: Vec<(Vec<Event>, Vec<Event>)>, stack: Vec<String>, next: Box<StateFn>, @@ -71,6 +73,7 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let info = DocumentInfo { continued: 0, + paragraph_before: false, inject: vec![], containers_begin_index: 0, stack: vec![], @@ -80,7 +83,7 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { - println!("before: check existing open containers"); + println!("before"); // First we iterate through the open blocks, starting with the root // document, and descending through last children down to the last open // block. @@ -101,8 +104,6 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR }; // To do: state? - println!("check existing: {:?}", name); - tokenizer.attempt(cont, move |ok| { if ok { Box::new(|t, c| document_continue(t, c, info)) @@ -112,7 +113,6 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR })(tokenizer, code) } else { // Done. - println!("check new:"); check_new_containers(tokenizer, code, info) } } @@ -124,67 +124,8 @@ fn document_continue( ) -> StateFnResult { println!("document_continue"); info.continued += 1; - - println!(" to do: close flow sometimes?"); - // // Note: this field is called `_closeFlow` but it also closes containers. - // // Perhaps a good idea to rename it but it’s already used in the wild by - // // extensions. - // if (self.containerState._closeFlow) { - // self.containerState._closeFlow = undefined - - // if (childFlow) { - // closeFlow() - // } - - // // Note: this algorithm for moving events around is similar to the - // // algorithm when dealing with lazy lines in `writeToChild`. - // const indexBeforeExits = self.events.length - // let indexBeforeFlow = indexBeforeExits - // /** @type {Point|undefined} */ - // let point - - // // Find the flow chunk. - // while (indexBeforeFlow--) { - // if ( - // self.events[indexBeforeFlow][0] === 'exit' && - // self.events[indexBeforeFlow][1].type === types.chunkFlow - // ) { - // point = self.events[indexBeforeFlow][1].end - // break - // } - // } - - // assert(point, 'could not find previous flow chunk') - - let size = info.continued; - info = exit_containers(tokenizer, info, size, true); - tokenizer.expect(code, true); - - // // Fix positions. - // let index = indexBeforeExits - - // while (index < self.events.length) { - // self.events[index][1].end = Object.assign({}, point) - // index++ - // } - - // // Inject the exits earlier (they’re still also at the end). - // splice( - // self.events, - // indexBeforeFlow + 1, - // 0, - // self.events.slice(indexBeforeExits) - // ) - - // // Discard the duplicate exits. - // self.events.length = index - - // return checkNewContainers(code) - // } - before(tokenizer, code, info) } -// documentContinue fn check_new_containers( tokenizer: &mut Tokenizer, @@ -198,18 +139,11 @@ fn check_new_containers( // step 1 before creating the new block as a child of the last matched // block. if info.continued == info.stack.len() { - // // No need to `check` whether there’s a container, if `exitContainers` - // // would be moot. - // // We can instead immediately `attempt` to parse one. - // if (!childFlow) { - // return documentContinued(code) - // } - // If we have concrete content, such as block HTML or fenced code, // we can’t have containers “pierce” into them, so we can immediately // start. if tokenizer.concrete { - println!(" concrete!"); + println!(" concrete"); return there_is_no_new_container(tokenizer, code, info); } @@ -239,7 +173,6 @@ fn there_is_a_new_container( mut info: DocumentInfo, name: String, ) -> StateFnResult { - println!("there_is_a_new_container"); let size = info.continued; info = exit_containers(tokenizer, info, size, true); tokenizer.expect(code, true); @@ -253,15 +186,12 @@ fn there_is_a_new_container( unreachable!("todo: cont {:?}", name) }; - println!("creating exit (a) for `{:?}`", name); - let token_types = end(); let mut index = 0; while index < token_types.len() { let token_type = &token_types[index]; let mut stack_index = tokenizer.stack.len(); - println!("stack: {:?}", tokenizer.stack); let mut found = false; while stack_index > 0 { @@ -278,11 +208,8 @@ fn there_is_a_new_container( index += 1; } - println!("add to stack: {:?}", name); info.stack.push(name); - - info.continued += 1; - document_continued(tokenizer, code, info) + document_continue(tokenizer, code, info) } /// Exit open containers. @@ -295,8 +222,7 @@ fn exit_containers( let mut exits: Vec<Event> = vec![]; if info.stack.len() > size { - // To do: inject these somewhere? Fix positions? - println!("closing flow. To do: are these resulting exits okay?"); + println!("closing flow"); let index = tokenizer.events.len(); let result = tokenizer.flush(info.next); info.next = Box::new(flow); // This is weird but Rust needs a function there. @@ -314,7 +240,6 @@ fn exit_containers( let mut add = tokenizer.events.drain(index..end).collect::<Vec<_>>(); - println!("evs: {:#?}", add); exits.append(&mut add); println!(" setting `interrupt: false`"); @@ -331,8 +256,6 @@ fn exit_containers( unreachable!("todo: cont {:?}", name) }; - println!("creating exit (b) for `{:?}`", name); - let token_types = end(); let mut index = 0; @@ -359,8 +282,6 @@ fn exit_containers( info.inject[index].1.append(&mut exits); } - // println!("exits: {:?} {:?}", info.inject, exits); - info } @@ -369,58 +290,15 @@ fn there_is_no_new_container( code: Code, info: DocumentInfo, ) -> StateFnResult { - let lazy = info.continued != info.stack.len(); - tokenizer.lazy = lazy; - println!("there is no new container"); - if lazy { - println!( - " This line will be lazy. Depending on what is parsed now, we need to close containers before?" - ); - } + println!("there_is_no_new_container"); + tokenizer.lazy = info.continued != info.stack.len(); // lineStartOffset = self.now().offset flow_start(tokenizer, code, info) } -fn document_continued(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { - println!("document_continued"); - - // Try new containers. - // To do: list. - tokenizer.attempt(block_quote, |ok| { - if ok { - Box::new(|t, c| container_continue(t, c, info)) - } else { - Box::new(|t, c| { - // To do: this looks like a bug? - t.lazy = false; - flow_start(t, c, info) - }) - } - })(tokenizer, code) -} - -fn container_continue( - tokenizer: &mut Tokenizer, - code: Code, - mut info: DocumentInfo, -) -> StateFnResult { - println!("container_continue"); - // assert( - // self.currentConstruct, - // 'expected `currentConstruct` to be defined on tokenizer' - // ) - // assert( - // self.containerState, - // 'expected `containerState` to be defined on tokenizer' - // ) - info.continued += 1; - // To do: add to stack? - // stack.push([self.currentConstruct, self.containerState]) - // Try another. - document_continued(tokenizer, code, info) -} - fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { + println!("flow_start"); + let containers = tokenizer .events .drain(info.containers_begin_index..) @@ -428,11 +306,6 @@ fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> info.inject.push((containers, vec![])); - // Exit containers. - let size = info.continued; - info = exit_containers(tokenizer, info, size, true); - tokenizer.expect(code, true); - // Define start. let point = tokenizer.point.clone(); tokenizer.define_skip(&point); @@ -440,9 +313,7 @@ fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> let state = info.next; info.next = Box::new(flow); // This is weird but Rust needs a function there. - println!("flow_start:before"); tokenizer.go_until(state, eof_eol, move |(state, remainder)| { - println!("flow_start:after"); ( State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))), remainder, @@ -456,31 +327,62 @@ fn flow_end( mut info: DocumentInfo, result: State, ) -> StateFnResult { - println!("flow_end"); - let was_lazy = tokenizer.lazy; + println!("flow_end: lazy? {:?}", tokenizer.lazy); - if was_lazy { - println!( - "this line was lazy. Depeding on what was parsed, we need to exit containers after it?" - ); + let index = tokenizer.events.len(); + let index = if index > 0 { + skip::opt_back(&tokenizer.events, index - 1, &[Token::LineEnding]) + } else { + 0 + }; + + let paragraph = if index > 0 { + let ev = &tokenizer.events[index]; + ev.point.offset + 1 >= tokenizer.point.offset + && ev.token_type == Token::Paragraph + && !(matches!( + tokenizer.previous, + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') + ) && matches!(code, Code::None)) + } else { + false + }; + + let mut continued = info.continued; + let size = info.stack.len(); + + if tokenizer.lazy { + println!("this line was lazy."); + + if info.paragraph_before && paragraph { + println!("it was another paragraph, which is allowed."); + continued = size; + } else { + println!( + "it was something else (prev: {:?}, cur: {:?}), which is not allowed.", + info.paragraph_before, paragraph + ); + } } + // Exit containers. + info = exit_containers(tokenizer, info, continued, true); + tokenizer.expect(code, true); + info.continued = 0; + info.paragraph_before = paragraph; info.containers_begin_index = tokenizer.events.len(); match result { State::Ok => { - println!("State::Ok"); info = exit_containers(tokenizer, info, 0, false); tokenizer.expect(code, true); - // println!("document:inject: {:?}", info.inject); let mut map = EditMap::new(); let mut line_index = 0; let mut index = 0; let add = info.inject[line_index].0.clone(); - println!("add enters at start: {:?}", add); map.add(0, 0, add); while index < tokenizer.events.len() { @@ -489,7 +391,6 @@ fn flow_end( if event.token_type == Token::LineEnding || event.token_type == Token::BlankLineEnding { - println!("eol: {:?}", event.point); if event.event_type == EventType::Enter { let mut add = info.inject[line_index].1.clone(); let mut deep_index = 0; @@ -498,12 +399,10 @@ fn flow_end( add[deep_index].index = event.index; deep_index += 1; } - println!("add exits before: {:?}", add); map.add(index, 0, add); } else { line_index += 1; let add = info.inject[line_index].0.clone(); - println!("add enters after: {:?}", add); map.add(index + 1, 0, add); } } @@ -518,12 +417,12 @@ fn flow_end( add[deep_index].index = tokenizer.index; deep_index += 1; } - println!("add exits at end: {:?}", add); map.add(index, 0, add); tokenizer.events = map.consume(&mut tokenizer.events); let mut index = 0; - println!("document:inject:ends: {:?}", tokenizer.events.len()); + + println!("after: {:?}", tokenizer.events.len()); while index < tokenizer.events.len() { let event = &tokenizer.events[index]; println!( diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 80786ea..163c2bf 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -611,7 +611,6 @@ fn attempt_impl( // Should it be before? // How to match `eof`? if !codes.is_empty() && pause(tokenizer.previous) { - println!("pause!: {:?}", (codes.clone(), vec![code])); return done( (codes, vec![code]), false, |