aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/construct/code_fenced.rs44
-rw-r--r--src/construct/code_indented.rs23
-rw-r--r--src/construct/html_flow.rs60
-rw-r--r--src/construct/mod.rs1
-rw-r--r--src/construct/partial_non_lazy_continuation.rs26
-rw-r--r--src/content/document.rs207
-rw-r--r--src/tokenizer.rs1
7 files changed, 165 insertions, 197 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index c7b2334..18beb92 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -102,7 +102,10 @@
//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
-use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
+use crate::construct::{
+ partial_non_lazy_continuation::start as partial_non_lazy_continuation,
+ partial_space_or_tab::{space_or_tab, space_or_tab_min_max},
+};
use crate::token::Token;
use crate::tokenizer::{Code, ContentType, State, StateFnResult, Tokenizer};
use crate::util::span::from_exit_event;
@@ -376,22 +379,35 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
let clone = info.clone();
- match code {
- Code::None => after(tokenizer, code, info),
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt(
- |t, c| close_begin(t, c, info),
- |ok| {
- if ok {
- Box::new(|t, c| after(t, c, clone))
- } else {
- Box::new(|t, c| content_before(t, c, clone))
- }
- },
- )(tokenizer, code),
- _ => unreachable!("expected eof/eol"),
+ if tokenizer.lazy {
+ after(tokenizer, code, info)
+ } else {
+ tokenizer.check(partial_non_lazy_continuation, |ok| {
+ if ok {
+ Box::new(move |t, c| at_non_lazy_break(t, c, clone))
+ } else {
+ Box::new(move |t, c| after(t, c, clone))
+ }
+ })(tokenizer, code)
}
}
+/// To do.
+fn at_non_lazy_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ let clone = info.clone();
+
+ tokenizer.attempt(
+ |t, c| close_begin(t, c, info),
+ |ok| {
+ if ok {
+ Box::new(|t, c| after(t, c, clone))
+ } else {
+ Box::new(|t, c| content_before(t, c, clone))
+ }
+ },
+ )(tokenizer, code)
+}
+
/// Before a closing fence, at the line ending.
///
/// ```markdown
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 8966249..74a0938 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -128,17 +128,20 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// cd
/// ```
fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- // To do: `nok` if lazy line.
- match code {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.enter(Token::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(Token::LineEnding);
- (State::Fn(Box::new(further_start)), None)
+ if tokenizer.lazy {
+ (State::Nok, None)
+ } else {
+ match code {
+ Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ tokenizer.enter(Token::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(Token::LineEnding);
+ (State::Fn(Box::new(further_start)), None)
+ }
+ _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
+ Box::new(if ok { further_end } else { further_begin })
+ })(tokenizer, code),
}
- _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
- Box::new(if ok { further_end } else { further_begin })
- })(tokenizer, code),
}
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index f30db3f..a8b1efc 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -100,7 +100,9 @@
use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE};
use crate::construct::{
- blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max,
+ blank_line::start as blank_line,
+ partial_non_lazy_continuation::start as partial_non_lazy_continuation,
+ partial_space_or_tab::space_or_tab_min_max,
};
use crate::token::Token;
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
@@ -425,7 +427,7 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
info.kind = Kind::Complete;
// Do not support complete HTML when interrupting.
- if tokenizer.interrupt {
+ if tokenizer.interrupt && !tokenizer.lazy {
(State::Nok, None)
} else if info.start_tag {
complete_attribute_name_before(tokenizer, code, info)
@@ -805,25 +807,52 @@ fn continuation_at_line_ending(tokenizer: &mut Tokenizer, code: Code, info: Info
/// asd
/// ```
fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ tokenizer.check(partial_non_lazy_continuation, |ok| {
+ let func = if ok {
+ html_continue_start_non_lazy
+ } else {
+ html_continue_after
+ };
+ Box::new(move |t, c| func(t, c, info))
+ })(tokenizer, code)
+}
+
+/// To do.
+#[allow(clippy::needless_pass_by_value)]
+fn html_continue_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ tokenizer.exit(Token::HtmlFlow);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
+ // Restore previous `concrete`.
+ tokenizer.concrete = info.concrete;
+ (State::Ok, Some(vec![code]))
+}
+
+/// To do.
+fn html_continue_start_non_lazy(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ info: Info,
+) -> StateFnResult {
match code {
- Code::None => {
- tokenizer.exit(Token::HtmlFlow);
- // Feel free to interrupt.
- tokenizer.interrupt = false;
- // Restore previous `concrete`.
- tokenizer.concrete = info.concrete;
- (State::Ok, Some(vec![code]))
- }
- // To do: do not allow lazy lines.
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume(code);
tokenizer.exit(Token::LineEnding);
(
- State::Fn(Box::new(|t, c| html_continue_start(t, c, info))),
+ State::Fn(Box::new(|t, c| html_continue_before(t, c, info))),
None,
)
}
+ _ => unreachable!("expected eol"),
+ }
+}
+
+fn html_continue_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ match code {
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ html_continue_start(tokenizer, code, info)
+ }
_ => {
tokenizer.enter(Token::HtmlFlowData);
continuation(tokenizer, code, info)
@@ -976,12 +1005,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(Token::HtmlFlowData);
- tokenizer.exit(Token::HtmlFlow);
- // Feel free to interrupt.
- tokenizer.interrupt = false;
- // Restore previous `concrete`.
- tokenizer.concrete = info.concrete;
- (State::Ok, Some(vec![code]))
+ html_continue_after(tokenizer, code, info)
}
_ => {
tokenizer.consume(code);
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index ac830ef..06ff4e9 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -84,6 +84,7 @@ pub mod paragraph;
pub mod partial_data;
pub mod partial_destination;
pub mod partial_label;
+pub mod partial_non_lazy_continuation;
pub mod partial_space_or_tab;
pub mod partial_title;
pub mod partial_whitespace;
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
new file mode 100644
index 0000000..7964de3
--- /dev/null
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -0,0 +1,26 @@
+//! To do.
+
+use crate::token::Token;
+use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
+
+/// To do.
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ tokenizer.enter(Token::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(Token::LineEnding);
+ (State::Fn(Box::new(non_lazy_after)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// To do.
+fn non_lazy_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ if tokenizer.lazy {
+ (State::Nok, None)
+ } else {
+ (State::Ok, Some(vec![code]))
+ }
+}
diff --git a/src/content/document.rs b/src/content/document.rs
index f093a04..a8ff775 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -19,6 +19,7 @@ use crate::tokenizer::{Code, Event, EventType, Point, State, StateFn, StateFnRes
use crate::util::edit_map::EditMap;
use crate::util::{
normalize_identifier::normalize_identifier,
+ skip,
span::{from_exit_event, serialize},
};
use std::collections::HashSet;
@@ -26,6 +27,7 @@ use std::collections::HashSet;
struct DocumentInfo {
continued: usize,
containers_begin_index: usize,
+ paragraph_before: bool,
inject: Vec<(Vec<Event>, Vec<Event>)>,
stack: Vec<String>,
next: Box<StateFn>,
@@ -71,6 +73,7 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec
fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
let info = DocumentInfo {
continued: 0,
+ paragraph_before: false,
inject: vec![],
containers_begin_index: 0,
stack: vec![],
@@ -80,7 +83,7 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult {
- println!("before: check existing open containers");
+ println!("before");
// First we iterate through the open blocks, starting with the root
// document, and descending through last children down to the last open
// block.
@@ -101,8 +104,6 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR
};
// To do: state?
- println!("check existing: {:?}", name);
-
tokenizer.attempt(cont, move |ok| {
if ok {
Box::new(|t, c| document_continue(t, c, info))
@@ -112,7 +113,6 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR
})(tokenizer, code)
} else {
// Done.
- println!("check new:");
check_new_containers(tokenizer, code, info)
}
}
@@ -124,67 +124,8 @@ fn document_continue(
) -> StateFnResult {
println!("document_continue");
info.continued += 1;
-
- println!(" to do: close flow sometimes?");
- // // Note: this field is called `_closeFlow` but it also closes containers.
- // // Perhaps a good idea to rename it but it’s already used in the wild by
- // // extensions.
- // if (self.containerState._closeFlow) {
- // self.containerState._closeFlow = undefined
-
- // if (childFlow) {
- // closeFlow()
- // }
-
- // // Note: this algorithm for moving events around is similar to the
- // // algorithm when dealing with lazy lines in `writeToChild`.
- // const indexBeforeExits = self.events.length
- // let indexBeforeFlow = indexBeforeExits
- // /** @type {Point|undefined} */
- // let point
-
- // // Find the flow chunk.
- // while (indexBeforeFlow--) {
- // if (
- // self.events[indexBeforeFlow][0] === 'exit' &&
- // self.events[indexBeforeFlow][1].type === types.chunkFlow
- // ) {
- // point = self.events[indexBeforeFlow][1].end
- // break
- // }
- // }
-
- // assert(point, 'could not find previous flow chunk')
-
- let size = info.continued;
- info = exit_containers(tokenizer, info, size, true);
- tokenizer.expect(code, true);
-
- // // Fix positions.
- // let index = indexBeforeExits
-
- // while (index < self.events.length) {
- // self.events[index][1].end = Object.assign({}, point)
- // index++
- // }
-
- // // Inject the exits earlier (they’re still also at the end).
- // splice(
- // self.events,
- // indexBeforeFlow + 1,
- // 0,
- // self.events.slice(indexBeforeExits)
- // )
-
- // // Discard the duplicate exits.
- // self.events.length = index
-
- // return checkNewContainers(code)
- // }
-
before(tokenizer, code, info)
}
-// documentContinue
fn check_new_containers(
tokenizer: &mut Tokenizer,
@@ -198,18 +139,11 @@ fn check_new_containers(
// step 1 before creating the new block as a child of the last matched
// block.
if info.continued == info.stack.len() {
- // // No need to `check` whether there’s a container, if `exitContainers`
- // // would be moot.
- // // We can instead immediately `attempt` to parse one.
- // if (!childFlow) {
- // return documentContinued(code)
- // }
-
// If we have concrete content, such as block HTML or fenced code,
// we can’t have containers “pierce” into them, so we can immediately
// start.
if tokenizer.concrete {
- println!(" concrete!");
+ println!(" concrete");
return there_is_no_new_container(tokenizer, code, info);
}
@@ -239,7 +173,6 @@ fn there_is_a_new_container(
mut info: DocumentInfo,
name: String,
) -> StateFnResult {
- println!("there_is_a_new_container");
let size = info.continued;
info = exit_containers(tokenizer, info, size, true);
tokenizer.expect(code, true);
@@ -253,15 +186,12 @@ fn there_is_a_new_container(
unreachable!("todo: cont {:?}", name)
};
- println!("creating exit (a) for `{:?}`", name);
-
let token_types = end();
let mut index = 0;
while index < token_types.len() {
let token_type = &token_types[index];
let mut stack_index = tokenizer.stack.len();
- println!("stack: {:?}", tokenizer.stack);
let mut found = false;
while stack_index > 0 {
@@ -278,11 +208,8 @@ fn there_is_a_new_container(
index += 1;
}
- println!("add to stack: {:?}", name);
info.stack.push(name);
-
- info.continued += 1;
- document_continued(tokenizer, code, info)
+ document_continue(tokenizer, code, info)
}
/// Exit open containers.
@@ -295,8 +222,7 @@ fn exit_containers(
let mut exits: Vec<Event> = vec![];
if info.stack.len() > size {
- // To do: inject these somewhere? Fix positions?
- println!("closing flow. To do: are these resulting exits okay?");
+ println!("closing flow");
let index = tokenizer.events.len();
let result = tokenizer.flush(info.next);
info.next = Box::new(flow); // This is weird but Rust needs a function there.
@@ -314,7 +240,6 @@ fn exit_containers(
let mut add = tokenizer.events.drain(index..end).collect::<Vec<_>>();
- println!("evs: {:#?}", add);
exits.append(&mut add);
println!(" setting `interrupt: false`");
@@ -331,8 +256,6 @@ fn exit_containers(
unreachable!("todo: cont {:?}", name)
};
- println!("creating exit (b) for `{:?}`", name);
-
let token_types = end();
let mut index = 0;
@@ -359,8 +282,6 @@ fn exit_containers(
info.inject[index].1.append(&mut exits);
}
- // println!("exits: {:?} {:?}", info.inject, exits);
-
info
}
@@ -369,58 +290,15 @@ fn there_is_no_new_container(
code: Code,
info: DocumentInfo,
) -> StateFnResult {
- let lazy = info.continued != info.stack.len();
- tokenizer.lazy = lazy;
- println!("there is no new container");
- if lazy {
- println!(
- " This line will be lazy. Depending on what is parsed now, we need to close containers before?"
- );
- }
+ println!("there_is_no_new_container");
+ tokenizer.lazy = info.continued != info.stack.len();
// lineStartOffset = self.now().offset
flow_start(tokenizer, code, info)
}
-fn document_continued(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult {
- println!("document_continued");
-
- // Try new containers.
- // To do: list.
- tokenizer.attempt(block_quote, |ok| {
- if ok {
- Box::new(|t, c| container_continue(t, c, info))
- } else {
- Box::new(|t, c| {
- // To do: this looks like a bug?
- t.lazy = false;
- flow_start(t, c, info)
- })
- }
- })(tokenizer, code)
-}
-
-fn container_continue(
- tokenizer: &mut Tokenizer,
- code: Code,
- mut info: DocumentInfo,
-) -> StateFnResult {
- println!("container_continue");
- // assert(
- // self.currentConstruct,
- // 'expected `currentConstruct` to be defined on tokenizer'
- // )
- // assert(
- // self.containerState,
- // 'expected `containerState` to be defined on tokenizer'
- // )
- info.continued += 1;
- // To do: add to stack?
- // stack.push([self.currentConstruct, self.containerState])
- // Try another.
- document_continued(tokenizer, code, info)
-}
-
fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
+ println!("flow_start");
+
let containers = tokenizer
.events
.drain(info.containers_begin_index..)
@@ -428,11 +306,6 @@ fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) ->
info.inject.push((containers, vec![]));
- // Exit containers.
- let size = info.continued;
- info = exit_containers(tokenizer, info, size, true);
- tokenizer.expect(code, true);
-
// Define start.
let point = tokenizer.point.clone();
tokenizer.define_skip(&point);
@@ -440,9 +313,7 @@ fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) ->
let state = info.next;
info.next = Box::new(flow); // This is weird but Rust needs a function there.
- println!("flow_start:before");
tokenizer.go_until(state, eof_eol, move |(state, remainder)| {
- println!("flow_start:after");
(
State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))),
remainder,
@@ -456,31 +327,62 @@ fn flow_end(
mut info: DocumentInfo,
result: State,
) -> StateFnResult {
- println!("flow_end");
- let was_lazy = tokenizer.lazy;
+ println!("flow_end: lazy? {:?}", tokenizer.lazy);
- if was_lazy {
- println!(
- "this line was lazy. Depeding on what was parsed, we need to exit containers after it?"
- );
+ let index = tokenizer.events.len();
+ let index = if index > 0 {
+ skip::opt_back(&tokenizer.events, index - 1, &[Token::LineEnding])
+ } else {
+ 0
+ };
+
+ let paragraph = if index > 0 {
+ let ev = &tokenizer.events[index];
+ ev.point.offset + 1 >= tokenizer.point.offset
+ && ev.token_type == Token::Paragraph
+ && !(matches!(
+ tokenizer.previous,
+ Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
+ ) && matches!(code, Code::None))
+ } else {
+ false
+ };
+
+ let mut continued = info.continued;
+ let size = info.stack.len();
+
+ if tokenizer.lazy {
+ println!("this line was lazy.");
+
+ if info.paragraph_before && paragraph {
+ println!("it was another paragraph, which is allowed.");
+ continued = size;
+ } else {
+ println!(
+ "it was something else (prev: {:?}, cur: {:?}), which is not allowed.",
+ info.paragraph_before, paragraph
+ );
+ }
}
+ // Exit containers.
+ info = exit_containers(tokenizer, info, continued, true);
+ tokenizer.expect(code, true);
+
info.continued = 0;
+ info.paragraph_before = paragraph;
info.containers_begin_index = tokenizer.events.len();
match result {
State::Ok => {
- println!("State::Ok");
info = exit_containers(tokenizer, info, 0, false);
tokenizer.expect(code, true);
- // println!("document:inject: {:?}", info.inject);
let mut map = EditMap::new();
let mut line_index = 0;
let mut index = 0;
let add = info.inject[line_index].0.clone();
- println!("add enters at start: {:?}", add);
map.add(0, 0, add);
while index < tokenizer.events.len() {
@@ -489,7 +391,6 @@ fn flow_end(
if event.token_type == Token::LineEnding
|| event.token_type == Token::BlankLineEnding
{
- println!("eol: {:?}", event.point);
if event.event_type == EventType::Enter {
let mut add = info.inject[line_index].1.clone();
let mut deep_index = 0;
@@ -498,12 +399,10 @@ fn flow_end(
add[deep_index].index = event.index;
deep_index += 1;
}
- println!("add exits before: {:?}", add);
map.add(index, 0, add);
} else {
line_index += 1;
let add = info.inject[line_index].0.clone();
- println!("add enters after: {:?}", add);
map.add(index + 1, 0, add);
}
}
@@ -518,12 +417,12 @@ fn flow_end(
add[deep_index].index = tokenizer.index;
deep_index += 1;
}
- println!("add exits at end: {:?}", add);
map.add(index, 0, add);
tokenizer.events = map.consume(&mut tokenizer.events);
let mut index = 0;
- println!("document:inject:ends: {:?}", tokenizer.events.len());
+
+ println!("after: {:?}", tokenizer.events.len());
while index < tokenizer.events.len() {
let event = &tokenizer.events[index];
println!(
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 80786ea..163c2bf 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -611,7 +611,6 @@ fn attempt_impl(
// Should it be before?
// How to match `eof`?
if !codes.is_empty() && pause(tokenizer.previous) {
- println!("pause!: {:?}", (codes.clone(), vec![code]));
return done(
(codes, vec![code]),
false,