aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/construct/code_fenced.rs5
-rw-r--r--src/construct/code_indented.rs11
-rw-r--r--src/construct/definition.rs17
-rw-r--r--src/construct/heading_atx.rs9
-rw-r--r--src/construct/heading_setext.rs233
-rw-r--r--src/construct/html_flow.rs19
-rw-r--r--src/construct/paragraph.rs150
-rw-r--r--src/construct/thematic_break.rs9
-rw-r--r--src/content/flow.rs46
-rw-r--r--src/tokenizer.rs3
10 files changed, 205 insertions, 297 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index d19cad0..f2d243a 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -179,7 +179,8 @@ struct Info {
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::CodeFenced);
tokenizer.enter(TokenType::CodeFencedFence);
- tokenizer.attempt_opt(space_or_tab(), before_sequence_open)(tokenizer, code)
+ // To do: allow arbitrary when code (indented) is turned off.
+ tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before_sequence_open)(tokenizer, code)
}
/// Inside the opening fence, after an optional prefix, before a sequence.
@@ -550,5 +551,7 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateF
/// ```
fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::CodeFenced);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
(State::Ok, Some(vec![code]))
}
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 99445b9..9bdfd71 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -59,8 +59,13 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// > filled line (that it has a non-whitespace character), because blank lines
/// > are parsed already, so we never run into that.
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.enter(TokenType::CodeIndented);
- tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer, code)
+ // Do not interrupt paragraphs.
+ if tokenizer.interrupt {
+ (State::Nok, None)
+ } else {
+ tokenizer.enter(TokenType::CodeIndented);
+ tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer, code)
+ }
}
/// At a break.
@@ -110,6 +115,8 @@ fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::CodeIndented);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
(State::Ok, Some(vec![code]))
}
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index f05064a..e1afd03 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -107,8 +107,19 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// |[a]: b "c"
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.enter(TokenType::Definition);
- tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
+ let index = tokenizer.events.len();
+ let definition_before = index > 3
+ && tokenizer.events[index - 1].token_type == TokenType::LineEnding
+ && tokenizer.events[index - 3].token_type == TokenType::Definition;
+
+ // Do not interrupt paragraphs (but do follow definitions).
+ if tokenizer.interrupt && !definition_before {
+ (State::Nok, None)
+ } else {
+ tokenizer.enter(TokenType::Definition);
+ // Note: arbitrary whitespace allowed even if code (indented) is on.
+ tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
+ }
}
/// At the start of a definition, after whitespace.
@@ -218,6 +229,8 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
tokenizer.exit(TokenType::Definition);
+ // You’d be interrupting.
+ tokenizer.interrupt = true;
(State::Ok, Some(vec![code]))
}
_ => (State::Nok, None),
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 2811894..3ce7052 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -54,8 +54,8 @@
//! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
//! [atx]: http://www.aaronsw.com/2002/atx/
-use super::partial_space_or_tab::space_or_tab;
-use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX;
+use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
+use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
use crate::tokenizer::{
Code, ContentType, Event, EventType, State, StateFnResult, TokenType, Tokenizer,
};
@@ -68,7 +68,8 @@ use crate::util::edit_map::EditMap;
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::HeadingAtx);
- tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
+ // To do: allow arbitrary when code (indented) is turned off.
+ tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
}
/// Start of a heading (atx), after whitespace.
@@ -127,6 +128,8 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(TokenType::HeadingAtx);
tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve));
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
(State::Ok, Some(vec![code]))
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 03a2e55..df20aa7 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -58,10 +58,9 @@
//! [atx]: http://www.aaronsw.com/2002/atx/
use crate::constant::TAB_SIZE;
-use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_with_options, Options};
-use crate::subtokenize::link;
-use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::span::from_exit_event;
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
+use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::edit_map::EditMap;
/// Kind of underline.
#[derive(Debug, Clone, PartialEq)]
@@ -109,150 +108,23 @@ impl Kind {
}
}
-/// Start of a heading (setext).
-///
-/// ```markdown
-/// |alpha
-/// ==
-/// ```
-pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.enter(TokenType::HeadingSetext);
- tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
-}
-
-/// Start of a heading (setext), after whitespace.
-///
-/// ```markdown
-/// |alpha
-/// ==
-/// ```
-fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- unreachable!("expected non-eol/eof");
- }
- _ => {
- tokenizer.enter(TokenType::HeadingSetextText);
- tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
- text_inside(tokenizer, code)
- }
- }
-}
-
-/// Inside text.
-///
-/// ```markdown
-/// al|pha
-/// bra|vo
-/// ==
-/// ```
-fn text_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::None => (State::Nok, None),
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.exit(TokenType::Data);
- tokenizer.exit(TokenType::HeadingSetextText);
- tokenizer.attempt(underline_before, |ok| {
- Box::new(if ok { after } else { text_continue })
- })(tokenizer, code)
- }
- _ => {
- tokenizer.consume(code);
- (State::Fn(Box::new(text_inside)), None)
- }
- }
-}
-
-/// At a line ending, not at an underline.
-///
-/// ```markdown
-/// alpha
-/// |bravo
-/// ==
-/// ```
-fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- // Needed to connect the text.
- tokenizer.enter(TokenType::HeadingSetextText);
- tokenizer.events.pop();
- tokenizer.events.pop();
-
- match code {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.enter_with_content(TokenType::LineEnding, Some(ContentType::Text));
- let index = tokenizer.events.len() - 1;
- link(&mut tokenizer.events, index);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
-
- (
- State::Fn(Box::new(tokenizer.attempt_opt(
- space_or_tab_with_options(Options {
- kind: TokenType::SpaceOrTab,
- min: 1,
- max: usize::MAX,
- content_type: Some(ContentType::Text),
- connect: true,
- }),
- text_line_start,
- ))),
- None,
- )
- }
- _ => unreachable!("expected eol"),
- }
-}
-
-/// At a line ending after whitespace, not at an underline.
-///
-/// ```markdown
-/// alpha
-/// |bravo
-/// ==
-/// ```
-fn text_line_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- // Blank lines not allowed.
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
- _ => {
- tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
- let index = tokenizer.events.len() - 1;
- link(&mut tokenizer.events, index);
- text_inside(tokenizer, code)
- }
- }
-}
-
-/// After a heading (setext).
-///
-/// ```markdown
-/// alpha
-/// ==|
-/// ```
-fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.exit(TokenType::HeadingSetext);
- (State::Ok, Some(vec![code]))
-}
-
/// At a line ending, presumably an underline.
///
/// ```markdown
/// alpha|
/// ==
/// ```
-fn underline_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (
- State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab(), underline_sequence_start),
- )),
- None,
- )
- }
- _ => unreachable!("expected eol"),
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ let index = tokenizer.events.len();
+ let paragraph_before = index > 3
+ && tokenizer.events[index - 1].token_type == TokenType::LineEnding
+ && tokenizer.events[index - 3].token_type == TokenType::Paragraph;
+
+ if paragraph_before {
+ // To do: allow arbitrary when code (indented) is turned off.
+ tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
+ } else {
+ (State::Nok, None)
}
}
@@ -262,26 +134,11 @@ fn underline_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// alpha
/// |==
/// ```
-fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let tail = tokenizer.events.last();
- let mut prefix = 0;
-
- if let Some(event) = tail {
- if event.token_type == TokenType::SpaceOrTab {
- let span = from_exit_event(&tokenizer.events, tokenizer.events.len() - 1);
- prefix = span.end_index - span.start_index;
- }
- }
-
- // To do: 4+ should be okay if code (indented) is turned off!
- if prefix >= TAB_SIZE {
- return (State::Nok, None);
- }
-
+fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char(char) if char == '-' || char == '=' => {
tokenizer.enter(TokenType::HeadingSetextUnderline);
- underline_sequence_inside(tokenizer, code, Kind::from_char(char))
+ inside(tokenizer, code, Kind::from_char(char))
}
_ => (State::Nok, None),
}
@@ -293,16 +150,13 @@ fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes
/// alpha
/// =|=
/// ```
-fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
+fn inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
Code::Char(char) if char == kind.as_char() => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(move |t, c| underline_sequence_inside(t, c, kind))),
- None,
- )
+ (State::Fn(Box::new(move |t, c| inside(t, c, kind))), None)
}
- _ => tokenizer.attempt_opt(space_or_tab(), underline_after)(tokenizer, code),
+ _ => tokenizer.attempt_opt(space_or_tab(), after)(tokenizer, code),
}
}
@@ -312,12 +166,59 @@ fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind)
/// alpha
/// ==|
/// ```
-fn underline_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(TokenType::HeadingSetextUnderline);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
+ tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve));
(State::Ok, Some(vec![code]))
}
_ => (State::Nok, None),
}
}
+
+/// To do.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+ let mut edit_map = EditMap::new();
+ let mut index = 0;
+ let mut paragraph_enter: Option<usize> = None;
+ let mut paragraph_exit: Option<usize> = None;
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ // Find paragraphs.
+ if event.event_type == EventType::Enter {
+ if event.token_type == TokenType::Paragraph {
+ paragraph_enter = Some(index);
+ }
+ } else if event.token_type == TokenType::Paragraph {
+ paragraph_exit = Some(index);
+ }
+ // We know this is preceded by a paragraph.
+ // Otherwise we don’t parse.
+ else if event.token_type == TokenType::HeadingSetextUnderline {
+ let enter = paragraph_enter.take().unwrap();
+ let exit = paragraph_exit.take().unwrap();
+
+ // Change types of Enter:Paragraph, Exit:Paragraph.
+ tokenizer.events[enter].token_type = TokenType::HeadingSetextText;
+ tokenizer.events[exit].token_type = TokenType::HeadingSetextText;
+
+ // Add of Enter:HeadingSetext, Exit:HeadingSetext.
+ let mut heading_enter = tokenizer.events[enter].clone();
+ heading_enter.token_type = TokenType::HeadingSetext;
+ let mut heading_exit = tokenizer.events[index].clone();
+ heading_exit.token_type = TokenType::HeadingSetext;
+
+ edit_map.add(enter, 0, vec![heading_enter]);
+ edit_map.add(index + 1, 0, vec![heading_exit]);
+ }
+
+ index += 1;
+ }
+
+ edit_map.consume(&mut tokenizer.events)
+}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index d0e0558..a1bddad 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -98,8 +98,10 @@
//! [html_block_names]: crate::constant::HTML_BLOCK_NAMES
//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
-use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX};
-use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::space_or_tab};
+use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE};
+use crate::construct::{
+ blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max,
+};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Kind of HTML (flow).
@@ -191,7 +193,8 @@ struct Info {
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::HtmlFlow);
tokenizer.enter(TokenType::HtmlFlowData);
- tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
+ // To do: allow arbitrary when code (indented) is turned off.
+ tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
}
/// After optional whitespace, before `<`.
@@ -400,8 +403,10 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
} else {
info.kind = Kind::Complete;
- // To do: do not support complete HTML when interrupting.
- if info.start_tag {
+ // Do not support complete HTML when interrupting.
+ if tokenizer.interrupt {
+ (State::Nok, None)
+ } else if info.start_tag {
complete_attribute_name_before(tokenizer, code, info)
} else {
complete_closing_tag_after(tokenizer, code, info)
@@ -784,6 +789,8 @@ fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta
match code {
Code::None => {
tokenizer.exit(TokenType::HtmlFlow);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
(State::Ok, Some(vec![code]))
}
// To do: do not allow lazy lines.
@@ -949,6 +956,8 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(TokenType::HtmlFlowData);
tokenizer.exit(TokenType::HtmlFlow);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
(State::Ok, Some(vec![code]))
}
_ => {
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index fea7052..ae2f4de 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -32,14 +32,10 @@
//! [code_text]: crate::construct::code_text
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
-use crate::constant::TAB_SIZE;
-use crate::construct::{
- blank_line::start as blank_line, code_fenced::start as code_fenced,
- heading_atx::start as heading_atx, html_flow::start as html_flow,
- partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break,
+use crate::tokenizer::{
+ Code, ContentType, Event, EventType, State, StateFnResult, TokenType, Tokenizer,
};
-use crate::subtokenize::link;
-use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::edit_map::EditMap;
/// Before a paragraph.
///
@@ -66,11 +62,14 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
- Code::None => end(tokenizer, code),
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer
- .check(interrupt, |ok| {
- Box::new(if ok { at_line_ending } else { end })
- })(tokenizer, code),
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ tokenizer.exit(TokenType::Data);
+ tokenizer.exit(TokenType::Paragraph);
+ tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve));
+ // You’d be interrupting.
+ tokenizer.interrupt = true;
+ (State::Ok, Some(vec![code]))
+ }
_ => {
tokenizer.consume(code);
(State::Fn(Box::new(inside)), None)
@@ -78,90 +77,55 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// At a line ending, not interrupting.
-///
-/// ```markdown
-/// alpha|
-/// bravo.
-/// ```
-fn at_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.consume(code);
- tokenizer.exit(TokenType::Data);
- tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
- let index = tokenizer.events.len() - 1;
- link(&mut tokenizer.events, index);
- (State::Fn(Box::new(inside)), None)
-}
+/// Merge “`Paragraph`”s, which currently span a single line, into actual
+/// `Paragraph`s that span multiple lines.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+ let mut edit_map = EditMap::new();
+ let len = tokenizer.events.len();
+ let mut index = 0;
-/// At a line ending, done.
-///
-/// ```markdown
-/// alpha|
-/// ***
-/// ```
-fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.exit(TokenType::Data);
- tokenizer.exit(TokenType::Paragraph);
- (State::Ok, Some(vec![code]))
-}
+ while index < len {
+ let event = &tokenizer.events[index];
-/// Before a potential interruption.
-///
-/// ```markdown
-/// alpha|
-/// ***
-/// ```
-fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (State::Fn(Box::new(interrupt_start)), None)
- }
- _ => unreachable!("expected eol"),
- }
-}
+ if event.event_type == EventType::Enter && event.token_type == TokenType::Paragraph {
+ // Exit:Paragraph
+ let mut exit_index = index + 3;
+ // Enter:Paragraph
+ let mut enter_next_index = exit_index + 3;
-/// After a line ending.
-///
-/// ```markdown
-/// alpha
-/// |~~~js
-/// ~~~
-/// ```
-fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- // To do: If code is disabled, indented lines are allowed to interrupt.
- tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
- Box::new(if ok { interrupt_indent } else { interrupt_cont })
- })(tokenizer, code)
-}
+ // To do: assert that `LineEnding` between?
+ while enter_next_index < len
+ && tokenizer.events[enter_next_index].token_type == TokenType::Paragraph
+ {
+ // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph.
+ edit_map.add(exit_index, 4, vec![]);
+ println!("rm {:?} {:?}", exit_index, exit_index + 4);
-/// At an indent.
-///
-/// ```markdown
-/// alpha
-/// |
-/// ```
-fn interrupt_indent(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- (State::Ok, Some(vec![code]))
-}
+ // Add Exit:LineEnding position info to Exit:Data.
+ let line_ending_exit = &tokenizer.events[enter_next_index - 1];
+ let line_ending_point = line_ending_exit.point.clone();
+ let line_ending_index = line_ending_exit.index;
+ let data_exit = &mut tokenizer.events[exit_index - 1];
+ data_exit.point = line_ending_point;
+ data_exit.index = line_ending_index;
-/// Not at an indented line.
-///
-/// ```markdown
-/// alpha
-/// |<div>
-/// ```
-fn interrupt_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt_n(
- vec![
- Box::new(blank_line),
- Box::new(code_fenced),
- Box::new(html_flow),
- Box::new(heading_atx),
- Box::new(thematic_break),
- ],
- |ok| Box::new(move |_t, code| (if ok { State::Nok } else { State::Ok }, Some(vec![code]))),
- )(tokenizer, code)
+ // Link Enter:Data on the previous line to Enter:Data on this line.
+ let data_enter_prev = &mut tokenizer.events[exit_index - 2];
+ data_enter_prev.next = Some(enter_next_index + 1);
+ let data_enter_next = &mut tokenizer.events[enter_next_index + 1];
+ data_enter_next.previous = Some(exit_index - 2);
+
+ // Potential next start.
+ exit_index = enter_next_index + 3;
+ enter_next_index = exit_index + 3;
+ }
+
+ // Move to `Exit:Paragraph`.
+ index = exit_index;
+ }
+
+ index += 1;
+ }
+
+ edit_map.consume(&mut tokenizer.events)
}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 9978ee0..8d29157 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -49,8 +49,8 @@
//!
//! <!-- To do: link `lists` -->
-use super::partial_space_or_tab::space_or_tab;
-use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN;
+use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
+use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Type of thematic break.
@@ -122,7 +122,8 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::ThematicBreak);
- tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
+ // To do: allow arbitrary when code (indented) is turned off.
+ tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
}
/// Start of a thematic break, after whitespace.
@@ -157,6 +158,8 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult
if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN =>
{
tokenizer.exit(TokenType::ThematicBreak);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
(State::Ok, Some(vec![code]))
}
Code::Char(char) if char == info.kind.as_char() => {
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 0d3ede0..3ff948d 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -92,26 +92,6 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// After a blank line.
-///
-/// Move to `start` afterwards.
-///
-/// ```markdown
-/// ␠␠|
-/// ```
-fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::None => (State::Ok, None),
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.enter(TokenType::BlankLineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::BlankLineEnding);
- (State::Fn(Box::new(start)), None)
- }
- _ => unreachable!("expected eol/eof after blank line `{:?}`", code),
- }
-}
-
/// Before flow (initial).
///
/// “Initial” flow means unprefixed flow, so right at the start of a line.
@@ -133,16 +113,38 @@ fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
Box::new(code_fenced),
Box::new(html_flow),
Box::new(heading_atx),
+ Box::new(heading_setext),
Box::new(thematic_break),
Box::new(definition),
- Box::new(heading_setext),
],
|ok| Box::new(if ok { after } else { before_paragraph }),
)(tokenizer, code),
}
}
-/// After a flow construct.
+/// After a blank line.
+///
+/// Move to `start` afterwards.
+///
+/// ```markdown
+/// ␠␠|
+/// ```
+fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::None => (State::Ok, None),
+ Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ tokenizer.enter(TokenType::BlankLineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::BlankLineEnding);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
+ (State::Fn(Box::new(start)), None)
+ }
+ _ => unreachable!("expected eol/eof after blank line `{:?}`", code),
+ }
+}
+
+/// After something.
///
/// ```markdown
/// ## alpha|
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 817c1de..b70e706 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1760,6 +1760,8 @@ pub struct Tokenizer<'a> {
/// To do.
pub label_start_list_loose: Vec<LabelStart>,
/// To do.
+ pub interrupt: bool,
+ /// To do.
pub media_list: Vec<Media>,
/// To do.
resolvers: Vec<Box<Resolver>>,
@@ -1783,6 +1785,7 @@ impl<'a> Tokenizer<'a> {
label_start_stack: vec![],
label_start_list_loose: vec![],
media_list: vec![],
+ interrupt: false,
resolvers: vec![],
resolver_ids: vec![],
}