aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/compiler.rs4
-rw-r--r--src/content/flow.rs164
-rw-r--r--src/tokenizer.rs58
3 files changed, 154 insertions, 72 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 166950e..4f362b8 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -62,7 +62,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
ignore_encode = true;
}
}
- TokenType::ContentPhrasing
+ TokenType::ContentChunk
| TokenType::AtxHeading
| TokenType::AtxHeadingSequence
| TokenType::AtxHeadingWhitespace
@@ -280,7 +280,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
}
// To do: `ContentPhrasing` should be parsed as phrasing first.
// This branch below currently acts as the resulting `data` tokens.
- TokenType::ContentPhrasing
+ TokenType::ContentChunk
// To do: `ChunkString` does not belong here. Remove it when subtokenization is supported.
| TokenType::ChunkString
| TokenType::Data
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 21c5721..6c47a10 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -96,16 +96,14 @@ fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```markdown
/// |qwe
/// | asd
+/// |~~~js
+/// |<div>
/// ```
fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Ok, None),
- _ => tokenizer.attempt(code_indented, |ok| {
- Box::new(if ok {
- after
- } else {
- initial_before_not_code_indented
- })
+ _ => tokenizer.attempt_3(code_indented, code_fenced, html_flow, |ok| {
+ Box::new(if ok { after } else { before })
})(tokenizer, code),
}
}
@@ -132,38 +130,6 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// Before flow (initial), but not at code (indented).
-///
-/// ```markdown
-/// |qwe
-/// ```
-fn initial_before_not_code_indented(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::None => (State::Ok, None),
- _ => tokenizer.attempt(code_fenced, |ok| {
- Box::new(if ok {
- after
- } else {
- initial_before_not_code_fenced
- })
- })(tokenizer, code),
- }
-}
-
-/// Before flow (initial), but not at code (fenced).
-///
-/// ```markdown
-/// |qwe
-/// ```
-fn initial_before_not_code_fenced(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::None => (State::Ok, None),
- _ => tokenizer.attempt(html_flow, |ok| Box::new(if ok { after } else { before }))(
- tokenizer, code,
- ),
- }
-}
-
/// Before flow, but not at code (indented) or code (fenced).
///
/// Compared to flow (initial), normal flow can be arbitrarily prefixed.
@@ -181,32 +147,11 @@ pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// Before flow, after potential whitespace.
///
/// ```markdown
-/// |qwe
+/// |# asd
+/// |***
/// ```
pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt(heading_atx, |ok| {
- Box::new(if ok { after } else { before_not_heading_atx })
- })(tokenizer, code)
-}
-
-/// Before flow, but not before a heading (atx)
-///
-/// ```markdown
-/// |qwe
-/// ```
-pub fn before_not_heading_atx(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt(thematic_break, |ok| {
- Box::new(if ok { after } else { before_not_thematic_break })
- })(tokenizer, code)
-}
-
-/// Before flow, but not before a heading (atx) or thematic break.
-///
-/// ```markdown
-/// |qwe
-/// ```
-pub fn before_not_thematic_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt(html_flow, |ok| {
+ tokenizer.attempt_2(heading_atx, thematic_break, |ok| {
Box::new(if ok { after } else { content_before })
})(tokenizer, code)
}
@@ -231,9 +176,8 @@ fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
_ => {
tokenizer.enter(TokenType::Content);
- tokenizer.enter(TokenType::ContentPhrasing);
- tokenizer.consume(code);
- (State::Fn(Box::new(content)), None)
+ tokenizer.enter(TokenType::ContentChunk);
+ content(tokenizer, code)
}
}
}
@@ -245,10 +189,15 @@ fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
// To do: lift limitations as documented above.
fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
- Code::None | Code::Char('\n' | '\r') => {
- tokenizer.exit(TokenType::ContentPhrasing);
- tokenizer.exit(TokenType::Content);
- after(tokenizer, code)
+ Code::None => {
+ tokenizer.exit(TokenType::ContentChunk);
+ content_end(tokenizer, code)
+ }
+ Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ tokenizer.exit(TokenType::ContentChunk);
+ tokenizer.check(continuation_construct, |ok| {
+ Box::new(if ok { content_continue } else { content_end })
+ })(tokenizer, code)
}
_ => {
tokenizer.consume(code);
@@ -256,3 +205,80 @@ fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
}
+
+fn continuation_construct(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ tokenizer.enter(TokenType::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LineEnding);
+ (
+ State::Fn(Box::new(continuation_construct_initial_before)),
+ None,
+ )
+ }
+ _ => unreachable!("expected eol"),
+ }
+}
+
+fn continuation_construct_initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.attempt_2(code_fenced, html_flow, |ok| {
+ if ok {
+ Box::new(|_tokenizer, _code| (State::Nok, None))
+ } else {
+ Box::new(|tokenizer, code| {
+ tokenizer.attempt(
+ |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+ |_ok| Box::new(continuation_construct_after_prefix),
+ )(tokenizer, code)
+ })
+ }
+ })(tokenizer, code)
+}
+
+fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ // let tail = tokenizer.events.last();
+ // let mut prefix = 0;
+
+ // if let Some(event) = tail {
+ // if event.token_type == TokenType::Whitespace {
+ // let span = get_span(&tokenizer.events, tokenizer.events.len() - 1);
+ // prefix = span.end_index - span.start_index;
+ // }
+ // }
+
+ match code {
+ // Blank lines are not allowed in content.
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
+ // If code is disabled, indented lines are part of the content.
+ // _ if prefix >= TAB_SIZE => {
+ // (State::Ok, None)
+ // }
+ _ => {
+ println!("to do: check if flow interrupts, assuming it can’t");
+ tokenizer.attempt_2(heading_atx, thematic_break, |ok| {
+ let result = if ok {
+ (State::Nok, None)
+ } else {
+ (State::Ok, None)
+ };
+ Box::new(|_t, _c| result)
+ })(tokenizer, code)
+ }
+ }
+}
+
+fn content_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ // To do: should this be part of the content chunk?
+ // That’s what `micromark-js` does.
+ tokenizer.enter(TokenType::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LineEnding);
+ tokenizer.enter(TokenType::ContentChunk);
+ (State::Fn(Box::new(content)), None)
+}
+
+fn content_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.exit(TokenType::Content);
+ after(tokenizer, code)
+}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index c8b1440..4239520 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -62,7 +62,8 @@ pub enum TokenType {
BlankLineWhitespace,
Content,
- ContentPhrasing,
+ ContentChunk,
+
ChunkString,
}
@@ -377,6 +378,61 @@ impl Tokenizer {
)
}
+ // To do: lifetimes, boxes, lmao.
+ pub fn attempt_2(
+ &mut self,
+ a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ done: impl FnOnce(bool) -> Box<StateFn> + 'static,
+ ) -> Box<StateFn> {
+ self.call_multiple(false, Some(Box::new(a)), Some(Box::new(b)), None, done)
+ }
+
+ pub fn attempt_3(
+ &mut self,
+ a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ done: impl FnOnce(bool) -> Box<StateFn> + 'static,
+ ) -> Box<StateFn> {
+ self.call_multiple(
+ false,
+ Some(Box::new(a)),
+ Some(Box::new(b)),
+ Some(Box::new(c)),
+ done,
+ )
+ }
+
+ pub fn call_multiple(
+ &mut self,
+ check: bool,
+ a: Option<Box<StateFn>>,
+ b: Option<Box<StateFn>>,
+ c: Option<Box<StateFn>>,
+ done: impl FnOnce(bool) -> Box<StateFn> + 'static,
+ ) -> Box<StateFn> {
+ if let Some(head) = a {
+ let callback = move |ok| {
+ if ok {
+ done(ok)
+ } else {
+ Box::new(move |tokenizer: &mut Tokenizer, code| {
+ tokenizer.call_multiple(check, b, c, None, done)(tokenizer, code)
+ })
+ }
+ };
+
+ if check {
+ self.check(head, callback)
+ } else {
+ self.attempt(head, callback)
+ }
+ } else {
+ done(false)
+ }
+ }
+
/// Feed a list of `codes` into `start`.
///
/// This is set up to support repeatedly calling `feed`, and thus streaming