Add peeking to unindented flow lines

author: Titus Wormer <tituswormer@gmail.com> 2022-08-09 17:32:01 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-08-09 17:32:01 +0200
commit: 9bab2951cd3e9d492f3aa9f127531643323a1de1 (patch)
tree: 012036d88b010dbfe940c77b8735a9a0686e5fd0 /src/content
parent: 8f8d72a749a39845fd03ae259533abe73dc7dcdf (diff)
download: markdown-rs-9bab2951cd3e9d492f3aa9f127531643323a1de1.tar.gz
markdown-rs-9bab2951cd3e9d492f3aa9f127531643323a1de1.tar.bz2
markdown-rs-9bab2951cd3e9d492f3aa9f127531643323a1de1.zip
2 files changed, 48 insertions, 12 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
index d02021a..cc83415 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -66,7 +66,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
         let event = &tokenizer.events[index];
 
         if event.event_type == EventType::Exit && event.token_type == Token::DefinitionLabelString {
-            // Note: we don‘t care about virtual spaces, so `as_str` is fine.
+            // Note: we don’t care about virtual spaces, so `as_str` is fine.
             let id = normalize_identifier(
                 Slice::from_position(
                     tokenizer.parse_state.bytes,
diff --git a/src/content/flow.rs b/src/content/flow.rs
index bfaf5e9..b3fb866 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -34,15 +34,56 @@ use crate::tokenizer::{State, StateName, Tokenizer};
 /// ```
 pub fn start(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
-        None => State::Ok,
-        _ => tokenizer.attempt(
-            StateName::BlankLineStart,
-            State::Fn(StateName::FlowBlankLineAfter),
-            State::Fn(StateName::FlowBefore),
+        Some(b'`' | b'~') => tokenizer.attempt(
+            StateName::CodeFencedStart,
+            State::Fn(StateName::FlowAfter),
+            State::Fn(StateName::FlowBeforeParagraph),
+        ),
+        Some(b'<') => tokenizer.attempt(
+            StateName::HtmlFlowStart,
+            State::Fn(StateName::FlowAfter),
+            State::Fn(StateName::FlowBeforeParagraph),
+        ),
+        Some(b'#') => tokenizer.attempt(
+            StateName::HeadingAtxStart,
+            State::Fn(StateName::FlowAfter),
+            State::Fn(StateName::FlowBeforeParagraph),
+        ),
+        // Note: `-` is also used in thematic breaks, so it’s not included here.
+        Some(b'=') => tokenizer.attempt(
+            StateName::HeadingSetextStart,
+            State::Fn(StateName::FlowAfter),
+            State::Fn(StateName::FlowBeforeParagraph),
+        ),
+        Some(b'*' | b'_') => tokenizer.attempt(
+            StateName::ThematicBreakStart,
+            State::Fn(StateName::FlowAfter),
+            State::Fn(StateName::FlowBeforeParagraph),
+        ),
+        Some(b'[') => tokenizer.attempt(
+            StateName::DefinitionStart,
+            State::Fn(StateName::FlowAfter),
+            State::Fn(StateName::FlowBeforeParagraph),
+        ),
+        // Actual parsing: blank line? Indented code? Indented anything?
+        // Also includes `-` which can be a setext heading underline or a thematic break.
+        None | Some(b'\t' | b'\n' | b' ' | b'-') => before_blank_line(tokenizer),
+        Some(_) => tokenizer.attempt(
+            StateName::ParagraphStart,
+            State::Fn(StateName::FlowAfter),
+            State::Nok,
         ),
     }
 }
 
+pub fn before_blank_line(tokenizer: &mut Tokenizer) -> State {
+    tokenizer.attempt(
+        StateName::BlankLineStart,
+        State::Fn(StateName::FlowBlankLineAfter),
+        State::Fn(StateName::FlowBeforeCodeIndented),
+    )
+}
+
 /// Before flow (initial).
 ///
 /// “Initial” flow means unprefixed flow, so right at the start of a line.
@@ -55,17 +96,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
 /// |~~~js
 /// |<div>
 /// ```
-pub fn before(tokenizer: &mut Tokenizer) -> State {
-    // match tokenizer.current {
-    //     None => State::Ok,
-    //     _ => {
+pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {
     tokenizer.attempt(
         StateName::CodeIndentedStart,
         State::Fn(StateName::FlowAfter),
         State::Fn(StateName::FlowBeforeCodeFenced),
     )
-    //     }
-    // }
 }
 
 pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State {
author	Titus Wormer <tituswormer@gmail.com>	2022-08-09 17:32:01 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-08-09 17:32:01 +0200
commit	9bab2951cd3e9d492f3aa9f127531643323a1de1 (patch)
tree	012036d88b010dbfe940c77b8735a9a0686e5fd0 /src/content
parent	8f8d72a749a39845fd03ae259533abe73dc7dcdf (diff)
download	markdown-rs-9bab2951cd3e9d492f3aa9f127531643323a1de1.tar.gz markdown-rs-9bab2951cd3e9d492f3aa9f127531643323a1de1.tar.bz2 markdown-rs-9bab2951cd3e9d492f3aa9f127531643323a1de1.zip