Add basic support for block quotes

author: Titus Wormer <tituswormer@gmail.com> 2022-07-07 17:21:38 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-07 17:36:35 +0200
commit: 4806864e5377a5fef937b3fa02542e620c547969 (patch)
tree: c91ae2bbd1dc2037f425efd24d62d05e706e3e60 /src
parent: c2b4402223e53498078fc33dd55aabc0a48cdb56 (diff)
download: markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.gz
markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.bz2
markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.zip
13 files changed, 645 insertions, 80 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 7e47f95..f27c0de 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -193,9 +193,7 @@ pub struct Options {
     /// // micromark is safe by default:
     /// assert_eq!(
     ///     micromark("> a"),
-    ///     // To do: block quote
-    ///     // "<blockquote>\n<p>a</p>\n</blockquote>"
-    ///     "<p>&gt; a</p>"
+    ///     "<blockquote>\n<p>a</p>\n</blockquote>"
     /// );
     ///
     /// // Define `default_line_ending` to configure the default:
@@ -209,9 +207,7 @@ pub struct Options {
     ///
     ///         }
     ///     ),
-    ///     // To do: block quote
-    ///     // "<blockquote>\r\n<p>a</p>\r\n</blockquote>"
-    ///     "<p>&gt; a</p>"
+    ///     "<blockquote>\r\n<p>a</p>\r\n</blockquote>"
     /// );
     /// ```
     pub default_line_ending: Option<LineEnding>,
@@ -418,6 +414,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
     enter_map.insert(TokenType::HeadingSetextText, on_enter_buffer);
     enter_map.insert(TokenType::Label, on_enter_buffer);
     enter_map.insert(TokenType::ResourceTitleString, on_enter_buffer);
+    enter_map.insert(TokenType::BlockQuote, on_enter_block_quote);
     enter_map.insert(TokenType::CodeIndented, on_enter_code_indented);
     enter_map.insert(TokenType::CodeFenced, on_enter_code_fenced);
     enter_map.insert(TokenType::CodeText, on_enter_code_text);
@@ -491,6 +488,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
     exit_map.insert(TokenType::CodeFlowChunk, on_exit_code_flow_chunk);
     exit_map.insert(TokenType::CodeText, on_exit_code_text);
     exit_map.insert(TokenType::CodeTextLineEnding, on_exit_code_text_line_ending);
+    exit_map.insert(TokenType::BlockQuote, on_exit_block_quote);
     exit_map.insert(TokenType::HardBreakEscape, on_exit_break);
     exit_map.insert(TokenType::HardBreakTrailing, on_exit_break);
     exit_map.insert(TokenType::HeadingAtx, on_exit_heading_atx);
@@ -607,6 +605,13 @@ fn on_enter_buffer(context: &mut CompileContext) {
     context.buffer();
 }
 
+/// Handle [`Enter`][EventType::Enter]:[`BlockQuote`][TokenType::BlockQuote].
+fn on_enter_block_quote(context: &mut CompileContext) {
+    // tightStack.push(false)
+    context.line_ending_if_needed();
+    context.tag("<blockquote>".to_string());
+}
+
 /// Handle [`Enter`][EventType::Enter]:[`CodeIndented`][TokenType::CodeIndented].
 fn on_enter_code_indented(context: &mut CompileContext) {
     context.code_flow_seen_data = Some(false);
@@ -695,6 +700,7 @@ fn on_enter_link(context: &mut CompileContext) {
 
 /// Handle [`Enter`][EventType::Enter]:[`Paragraph`][TokenType::Paragraph].
 fn on_enter_paragraph(context: &mut CompileContext) {
+    context.line_ending_if_needed();
     context.tag("<p>".to_string());
 }
 
@@ -756,6 +762,14 @@ fn on_exit_break(context: &mut CompileContext) {
     context.tag("<br />".to_string());
 }
 
+/// Handle [`Exit`][EventType::Exit]:[`BlockQuote`][TokenType::BlockQuote].
+fn on_exit_block_quote(context: &mut CompileContext) {
+    // tightStack.pop()
+    context.line_ending_if_needed();
+    context.tag("</blockquote>".to_string());
+    // let mut slurp_all_line_endings = false;
+}
+
 /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][TokenType::CharacterReferenceMarker].
 fn on_exit_character_reference_marker(context: &mut CompileContext) {
     context.character_reference_kind = Some(CharacterReferenceKind::Named);
@@ -971,6 +985,7 @@ fn on_exit_heading_atx_sequence(context: &mut CompileContext) {
             false,
         )
         .len();
+        context.line_ending_if_needed();
         context.atx_opening_sequence_size = Some(rank);
         context.tag(format!("<h{}>", rank));
     }
@@ -1001,6 +1016,7 @@ fn on_exit_heading_setext_underline(context: &mut CompileContext) {
     )[0];
     let level: usize = if head == Code::Char('-') { 2 } else { 1 };
 
+    context.line_ending_if_needed();
     context.tag(format!("<h{}>", level));
     context.push(text);
     context.tag(format!("</h{}>", level));
@@ -1157,5 +1173,6 @@ fn on_exit_strong(context: &mut CompileContext) {
 
 /// Handle [`Exit`][EventType::Exit]:[`ThematicBreak`][TokenType::ThematicBreak].
 fn on_exit_thematic_break(context: &mut CompileContext) {
+    context.line_ending_if_needed();
     context.tag("<hr />".to_string());
 }
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
new file mode 100644
index 0000000..cd5b872
--- /dev/null
+++ b/src/construct/block_quote.rs
@@ -0,0 +1,58 @@
+//! To do.
+
+use crate::constant::TAB_SIZE;
+use crate::construct::partial_space_or_tab::space_or_tab_min_max;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // To do: allow arbitrary when code (indented) is turned off.
+    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
+}
+
+fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            tokenizer.enter(TokenType::BlockQuote);
+            cont_before(tokenizer, code)
+        }
+        _ => cont_before(tokenizer, code),
+    }
+}
+
+pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // To do: allow arbitrary when code (indented) is turned off.
+    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), cont_before)(tokenizer, code)
+}
+
+fn cont_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::Char('>') => {
+            tokenizer.enter(TokenType::BlockQuotePrefix);
+            tokenizer.enter(TokenType::BlockQuoteMarker);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::BlockQuoteMarker);
+            (State::Fn(Box::new(cont_after)), None)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+fn cont_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.enter(TokenType::BlockQuotePrefixWhitespace);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::BlockQuotePrefixWhitespace);
+            tokenizer.exit(TokenType::BlockQuotePrefix);
+            (State::Ok, None)
+        }
+        _ => {
+            tokenizer.exit(TokenType::BlockQuotePrefix);
+            (State::Ok, Some(vec![code]))
+        }
+    }
+}
+
+pub fn end() -> Vec<TokenType> {
+    vec![TokenType::BlockQuote]
+}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 211434f..440baa8 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -60,7 +60,7 @@
 use crate::constant::TAB_SIZE;
 use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
 use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::edit_map::EditMap;
+use crate::util::{edit_map::EditMap, skip::opt_back as skip_opt_back};
 
 /// Kind of underline.
 #[derive(Debug, Clone, PartialEq)]
@@ -116,11 +116,26 @@ impl Kind {
 /// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     let index = tokenizer.events.len();
-    let paragraph_before = index > 3
-        && tokenizer.events[index - 1].token_type == TokenType::LineEnding
-        && tokenizer.events[index - 3].token_type == TokenType::Paragraph;
-
-    if paragraph_before {
+    let previous = if index > 1 {
+        skip_opt_back(
+            &tokenizer.events,
+            index - 1,
+            &[TokenType::SpaceOrTab, TokenType::BlockQuotePrefix],
+        )
+    } else {
+        0
+    };
+    let previous = skip_opt_back(&tokenizer.events, previous, &[TokenType::LineEnding]);
+    let paragraph_before =
+        previous > 1 && tokenizer.events[previous].token_type == TokenType::Paragraph;
+
+    println!(
+        "setext-start: {:?} {:?} {:?}",
+        tokenizer.interrupt, tokenizer.lazy, paragraph_before
+    );
+
+    // Require a paragraph before and do not allow on a lazy line.
+    if paragraph_before && !tokenizer.lazy {
         // To do: allow arbitrary when code (indented) is turned off.
         tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)
     } else {
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 66b2a3c..936ecf6 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -64,6 +64,7 @@
 pub mod attention;
 pub mod autolink;
 pub mod blank_line;
+pub mod block_quote;
 pub mod character_escape;
 pub mod character_reference;
 pub mod code_fenced;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 4f5e662..ace174f 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -35,7 +35,7 @@
 use crate::tokenizer::{
     Code, ContentType, Event, EventType, State, StateFnResult, TokenType, Tokenizer,
 };
-use crate::util::edit_map::EditMap;
+use crate::util::{edit_map::EditMap, skip::opt as skip_opt};
 
 /// Before a paragraph.
 ///
@@ -90,19 +90,27 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
         if event.event_type == EventType::Enter && event.token_type == TokenType::Paragraph {
             // Exit:Paragraph
             let mut exit_index = index + 3;
+            let mut enter_next_index =
+                skip_opt(&tokenizer.events, exit_index + 1, &[TokenType::LineEnding]);
             // Enter:Paragraph
-            let mut enter_next_index = exit_index + 3;
+            enter_next_index = skip_opt(
+                &tokenizer.events,
+                enter_next_index,
+                &[TokenType::SpaceOrTab, TokenType::BlockQuotePrefix],
+            );
 
             // Find future `Paragraphs`.
-            // There will be `LineEnding` between.
-            while enter_next_index < len
+            while enter_next_index < tokenizer.events.len()
                 && tokenizer.events[enter_next_index].token_type == TokenType::Paragraph
             {
                 // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph.
-                edit_map.add(exit_index, 4, vec![]);
+                edit_map.add(exit_index, 3, vec![]);
+
+                // Remove Enter:Paragraph.
+                edit_map.add(enter_next_index, 1, vec![]);
 
                 // Add Exit:LineEnding position info to Exit:Data.
-                let line_ending_exit = &tokenizer.events[enter_next_index - 1];
+                let line_ending_exit = &tokenizer.events[exit_index + 2];
                 let line_ending_point = line_ending_exit.point.clone();
                 let line_ending_index = line_ending_exit.index;
                 let data_exit = &mut tokenizer.events[exit_index - 1];
@@ -117,7 +125,13 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
 
                 // Potential next start.
                 exit_index = enter_next_index + 3;
-                enter_next_index = exit_index + 3;
+                enter_next_index =
+                    skip_opt(&tokenizer.events, exit_index + 1, &[TokenType::LineEnding]);
+                enter_next_index = skip_opt(
+                    &tokenizer.events,
+                    enter_next_index,
+                    &[TokenType::SpaceOrTab, TokenType::BlockQuotePrefix],
+                );
             }
 
             // Move to `Exit:Paragraph`.
diff --git a/src/content/document.rs b/src/content/document.rs
new file mode 100644
index 0000000..dd5038f
--- /dev/null
+++ b/src/content/document.rs
@@ -0,0 +1,439 @@
+//! The document content type.
+//!
+//! **Document** represents the containers, such as block quotes and lists,
+//! which structure the document and contain other sections.
+//!
+//! The constructs found in flow are:
+//!
+//! *   [Block quote][crate::construct::block_quote]
+//! *   List
+
+use crate::construct::block_quote::{
+    cont as block_quote_cont, end as block_quote_end, start as block_quote,
+};
+use crate::content::flow::start as flow;
+use crate::parser::ParseState;
+use crate::subtokenize::subtokenize;
+use crate::tokenizer::{
+    Code, Event, EventType, Point, State, StateFn, StateFnResult, TokenType, Tokenizer,
+};
+use crate::util::edit_map::EditMap;
+use crate::util::{
+    normalize_identifier::normalize_identifier,
+    span::{from_exit_event, serialize},
+};
+use std::collections::HashSet;
+
+struct DocumentInfo {
+    continued: usize,
+    stack: Vec<String>,
+    next: Box<StateFn>,
+    last_line_ending_index: Option<usize>,
+    map: EditMap,
+}
+
+/// Turn `codes` as the document content type into events.
+pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> {
+    let mut tokenizer = Tokenizer::new(point, index, parse_state);
+
+    tokenizer.push(&parse_state.codes, Box::new(start), true);
+
+    let mut index = 0;
+    let mut next_definitions: HashSet<String> = HashSet::new();
+
+    while index < tokenizer.events.len() {
+        let event = &tokenizer.events[index];
+
+        if event.event_type == EventType::Exit
+            && event.token_type == TokenType::DefinitionLabelString
+        {
+            next_definitions.insert(normalize_identifier(
+                serialize(
+                    &parse_state.codes,
+                    &from_exit_event(&tokenizer.events, index),
+                    false,
+                )
+                .as_str(),
+            ));
+        }
+
+        index += 1;
+    }
+
+    let mut result = (tokenizer.events, false);
+
+    parse_state.definitions = next_definitions;
+
+    while !result.1 {
+        result = subtokenize(result.0, parse_state);
+    }
+
+    result.0
+}
+
+fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    let info = DocumentInfo {
+        continued: 0,
+        stack: vec![],
+        next: Box::new(flow),
+        last_line_ending_index: None,
+        map: EditMap::new(),
+    };
+    before(tokenizer, code, info)
+}
+
+fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult {
+    println!("before: check existing open containers");
+    // First we iterate through the open blocks, starting with the root
+    // document, and descending through last children down to the last open
+    // block.
+    // Each block imposes a condition that the line must satisfy if the block
+    // is to remain open.
+    // For example, a block quote requires a `>` character.
+    // A paragraph requires a non-blank line.
+    // In this phase we may match all or just some of the open blocks.
+    // But we cannot close unmatched blocks yet, because we may have a lazy
+    // continuation line.
+    if info.continued < info.stack.len() {
+        let name = &info.stack[info.continued];
+        // To do: list.
+        let cont = if name == "blockquote" {
+            block_quote_cont
+        } else {
+            unreachable!("todo: cont construct {:?}", name)
+        };
+
+        // To do: state?
+
+        tokenizer.attempt(cont, move |ok| {
+            if ok {
+                Box::new(|t, c| document_continue(t, c, info))
+            } else {
+                Box::new(|t, c| check_new_containers(t, c, info))
+            }
+        })(tokenizer, code)
+    } else {
+        // Done.
+        check_new_containers(tokenizer, code, info)
+    }
+}
+
+fn document_continue(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    mut info: DocumentInfo,
+) -> StateFnResult {
+    println!("document_continue");
+    info.continued += 1;
+
+    println!("  to do: close flow sometimes?");
+    // // Note: this field is called `_closeFlow` but it also closes containers.
+    // // Perhaps a good idea to rename it but it’s already used in the wild by
+    // // extensions.
+    // if (self.containerState._closeFlow) {
+    //   self.containerState._closeFlow = undefined
+
+    //   if (childFlow) {
+    //     closeFlow()
+    //   }
+
+    //   // Note: this algorithm for moving events around is similar to the
+    //   // algorithm when dealing with lazy lines in `writeToChild`.
+    //   const indexBeforeExits = self.events.length
+    //   let indexBeforeFlow = indexBeforeExits
+    //   /** @type {Point|undefined} */
+    //   let point
+
+    //   // Find the flow chunk.
+    //   while (indexBeforeFlow--) {
+    //     if (
+    //       self.events[indexBeforeFlow][0] === 'exit' &&
+    //       self.events[indexBeforeFlow][1].type === types.chunkFlow
+    //     ) {
+    //       point = self.events[indexBeforeFlow][1].end
+    //       break
+    //     }
+    //   }
+
+    //   assert(point, 'could not find previous flow chunk')
+
+    let size = info.continued;
+    exit_containers(tokenizer, &mut info, size);
+
+    //   // Fix positions.
+    //   let index = indexBeforeExits
+
+    //   while (index < self.events.length) {
+    //     self.events[index][1].end = Object.assign({}, point)
+    //     index++
+    //   }
+
+    //   // Inject the exits earlier (they’re still also at the end).
+    //   splice(
+    //     self.events,
+    //     indexBeforeFlow + 1,
+    //     0,
+    //     self.events.slice(indexBeforeExits)
+    //   )
+
+    //   // Discard the duplicate exits.
+    //   self.events.length = index
+
+    //   return checkNewContainers(code)
+    // }
+
+    before(tokenizer, code, info)
+}
+// documentContinue
+
+fn check_new_containers(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    info: DocumentInfo,
+) -> StateFnResult {
+    println!("check_new_containers");
+    // Next, after consuming the continuation markers for existing blocks, we
+    // look for new block starts (e.g. `>` for a block quote).
+    // If we encounter a new block start, we close any blocks unmatched in
+    // step 1 before creating the new block as a child of the last matched
+    // block.
+    if info.continued == info.stack.len() {
+        println!("  to do: concrete? interrupt?");
+        //   // No need to `check` whether there’s a container, of `exitContainers`
+        //   // would be moot.
+        //   // We can instead immediately `attempt` to parse one.
+        //   if (!childFlow) {
+        //     return documentContinued(code)
+        //   }
+
+        //   // If we have concrete content, such as block HTML or fenced code,
+        //   // we can’t have containers “pierce” into them, so we can immediately
+        //   // start.
+        //   if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) {
+        //     return flowStart(code)
+        //   }
+
+        //   // If we do have flow, it could still be a blank line,
+        //   // but we’d be interrupting it w/ a new container if there’s a current
+        //   // construct.
+        //   self.interrupt = Boolean(
+        //     childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
+        //   )
+    }
+
+    // Check if there is a new container.
+    // To do: list.
+    tokenizer.attempt(block_quote, move |ok| {
+        if ok {
+            Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string()))
+        } else {
+            Box::new(|t, c| there_is_no_new_container(t, c, info))
+        }
+    })(tokenizer, code)
+}
+
+fn there_is_a_new_container(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    mut info: DocumentInfo,
+    name: String,
+) -> StateFnResult {
+    println!("there_is_a_new_container");
+    println!("  todo: close_flow");
+    // if (childFlow) closeFlow()
+    let size = info.continued;
+    exit_containers(tokenizer, &mut info, size);
+    info.stack.push(name);
+    info.continued += 1;
+    document_continued(tokenizer, code, info)
+}
+
+/// Exit open containers.
+fn exit_containers(tokenizer: &mut Tokenizer, info: &mut DocumentInfo, size: usize) {
+    while info.stack.len() > size {
+        let name = info.stack.pop().unwrap();
+
+        // To do: list.
+        let end = if name == "blockquote" {
+            block_quote_end
+        } else {
+            unreachable!("todo: cont {:?}", name)
+        };
+
+        // To do: improve below code.
+        let insert_index = if let Some(index) = info.last_line_ending_index {
+            index
+        } else {
+            tokenizer.events.len()
+        };
+        let eol_point = if let Some(index) = info.last_line_ending_index {
+            tokenizer.events[index].point.clone()
+        } else {
+            tokenizer.point.clone()
+        };
+        let eol_index = if let Some(index) = info.last_line_ending_index {
+            tokenizer.events[index].index
+        } else {
+            tokenizer.index
+        };
+
+        let token_types = end();
+
+        let mut index = 0;
+        while index < token_types.len() {
+            let token_type = &token_types[index];
+
+            info.map.add(
+                insert_index,
+                0,
+                vec![Event {
+                    event_type: EventType::Exit,
+                    token_type: token_type.clone(),
+                    point: eol_point.clone(),
+                    index: eol_index,
+                    previous: None,
+                    next: None,
+                    content_type: None,
+                }],
+            );
+
+            let mut stack_index = tokenizer.stack.len();
+
+            while stack_index > 0 {
+                stack_index -= 1;
+
+                if tokenizer.stack[stack_index] == *token_type {
+                    break;
+                }
+            }
+
+            assert_eq!(
+                tokenizer.stack[stack_index], *token_type,
+                "expected token type"
+            );
+            tokenizer.stack.remove(stack_index);
+
+            index += 1;
+        }
+    }
+}
+
+fn there_is_no_new_container(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    info: DocumentInfo,
+) -> StateFnResult {
+    let lazy = info.continued != info.stack.len();
+    tokenizer.lazy = lazy;
+    println!("there is no new container");
+    if lazy {
+        println!(
+            "  This line will be lazy. Depending on what is parsed now, we need to close containers before?"
+        );
+    }
+    // lineStartOffset = self.now().offset
+    flow_start(tokenizer, code, info)
+}
+
+fn document_continued(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult {
+    println!("document_continued");
+
+    // Try new containers.
+    // To do: list.
+    tokenizer.attempt(block_quote, |ok| {
+        if ok {
+            Box::new(|t, c| container_continue(t, c, info))
+        } else {
+            Box::new(|t, c| {
+                // To do: this looks like a bug?
+                t.lazy = false;
+                flow_start(t, c, info)
+            })
+        }
+    })(tokenizer, code)
+}
+
+fn container_continue(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    mut info: DocumentInfo,
+) -> StateFnResult {
+    println!("container_continue");
+    // assert(
+    //   self.currentConstruct,
+    //   'expected `currentConstruct` to be defined on tokenizer'
+    // )
+    // assert(
+    //   self.containerState,
+    //   'expected `containerState` to be defined on tokenizer'
+    // )
+    info.continued += 1;
+    // To do: add to stack?
+    // stack.push([self.currentConstruct, self.containerState])
+    // Try another.
+    document_continued(tokenizer, code, info)
+}
+
+fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult {
+    println!("flow_start");
+    let next = info.next;
+    info.next = Box::new(flow); // This is weird but Rust needs a function there.
+
+    let size = info.continued;
+    exit_containers(tokenizer, &mut info, size);
+
+    tokenizer.go_until(next, eof_eol, move |(state, remainder)| {
+        (
+            State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))),
+            remainder,
+        )
+    })(tokenizer, code)
+}
+
+fn flow_end(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    mut info: DocumentInfo,
+    result: State,
+) -> StateFnResult {
+    println!("flow_end");
+    let was_lazy = tokenizer.lazy;
+
+    if was_lazy {
+        println!(
+            "this line was lazy. Depeding on what was parsed, we need to exit containers after it?"
+        );
+    }
+
+    info.continued = 0;
+
+    // To do: blank lines? Other things?
+    if tokenizer.events.len() > 2
+        && tokenizer.events[tokenizer.events.len() - 1].token_type == TokenType::LineEnding
+    {
+        info.last_line_ending_index = Some(tokenizer.events.len() - 2);
+    } else {
+        info.last_line_ending_index = None;
+    }
+
+    match result {
+        State::Ok => {
+            println!("State::Ok");
+            exit_containers(tokenizer, &mut info, 0);
+            tokenizer.events = info.map.consume(&mut tokenizer.events);
+            (State::Ok, Some(vec![code]))
+        }
+        State::Nok => unreachable!("handle nok in `flow`?"),
+        State::Fn(func) => {
+            info.next = func;
+            before(tokenizer, code, info)
+        }
+    }
+}
+
+fn eof_eol(code: Code) -> bool {
+    matches!(
+        code,
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
+    )
+}
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 74c6a62..f406685 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -26,52 +26,7 @@ use crate::construct::{
     html_flow::start as html_flow, paragraph::start as paragraph,
     thematic_break::start as thematic_break,
 };
-use crate::parser::ParseState;
-use crate::subtokenize::subtokenize;
-use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::{
-    normalize_identifier::normalize_identifier,
-    span::{from_exit_event, serialize},
-};
-use std::collections::HashSet;
-
-/// Turn `codes` as the flow content type into events.
-pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> {
-    let mut tokenizer = Tokenizer::new(point, index, parse_state);
-    tokenizer.push(&parse_state.codes, Box::new(start), true);
-    let mut next_definitions: HashSet<String> = HashSet::new();
-
-    let mut index = 0;
-
-    while index < tokenizer.events.len() {
-        let event = &tokenizer.events[index];
-
-        if event.event_type == EventType::Exit
-            && event.token_type == TokenType::DefinitionLabelString
-        {
-            next_definitions.insert(normalize_identifier(
-                serialize(
-                    &parse_state.codes,
-                    &from_exit_event(&tokenizer.events, index),
-                    false,
-                )
-                .as_str(),
-            ));
-        }
-
-        index += 1;
-    }
-
-    let mut result = (tokenizer.events, false);
-
-    parse_state.definitions = next_definitions;
-
-    while !result.1 {
-        result = subtokenize(result.0, parse_state);
-    }
-
-    result.0
-}
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
 /// Before flow.
 ///
@@ -83,7 +38,7 @@ pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Eve
 /// |    bravo
 /// |***
 /// ```
-fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None => (State::Ok, None),
         _ => tokenizer.attempt(blank_line, |ok| {
diff --git a/src/content/mod.rs b/src/content/mod.rs
index ae8ad83..af40cc0 100644
--- a/src/content/mod.rs
+++ b/src/content/mod.rs
@@ -1,5 +1,11 @@
 //! Content types found in markdown.
+//!
+//! *   [document][document]
+//! *   [flow][flow]
+//! *   [string][string]
+//! *   [text][text]
 
+pub mod document;
 pub mod flow;
 pub mod string;
 pub mod text;
diff --git a/src/parser.rs b/src/parser.rs
index 69dd355..b1fd4fd 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -2,7 +2,7 @@
 
 use std::collections::HashSet;
 // To do: this should start with `containers`, when they’re done.
-use crate::content::flow::flow;
+use crate::content::document::document;
 use crate::tokenizer::{Code, Event, Point};
 use crate::util::codes::parse as parse_codes;
 
@@ -27,7 +27,7 @@ pub fn parse(value: &str) -> (Vec<Event>, ParseState) {
         definitions: HashSet::new(),
     };
 
-    let events = flow(
+    let events = document(
         &mut parse_state,
         Point {
             line: 1,
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 8c11a68..cbcc464 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1702,6 +1702,10 @@ pub enum TokenType {
     ///
     /// > 👉 **Note**: this is used while parsing but compiled away.
     AttentionSequence,
+    BlockQuote,
+    BlockQuoteMarker,
+    BlockQuotePrefix,
+    BlockQuotePrefixWhitespace,
 }
 
 /// Embedded content type.
@@ -1841,6 +1845,7 @@ struct InternalState {
 // #[derive(Debug)]
 
 /// A tokenizer itself.
+#[allow(clippy::struct_excessive_bools)]
 pub struct Tokenizer<'a> {
     column_start: HashMap<usize, usize>,
     /// Track whether a character is expected to be consumed, and whether it’s
@@ -1855,15 +1860,15 @@ pub struct Tokenizer<'a> {
     /// Hierarchy of semantic labels.
     ///
     /// Tracked to make sure everything’s valid.
-    stack: Vec<TokenType>,
+    pub stack: Vec<TokenType>,
     /// Previous character code.
     pub previous: Code,
     /// Current character code.
     current: Code,
     /// `index` in codes of the current code.
-    index: usize,
+    pub index: usize,
     /// Current relative and absolute place in the file.
-    point: Point,
+    pub point: Point,
     /// List of attached resolvers, which will be called when done feeding,
     /// to clean events.
     resolvers: Vec<Box<Resolver>>,
@@ -1887,6 +1892,7 @@ pub struct Tokenizer<'a> {
     ///
     /// Used when tokenizing [flow content][crate::content::flow].
     pub interrupt: bool,
+    pub lazy: bool,
 }
 
 impl<'a> Tokenizer<'a> {
@@ -1907,6 +1913,7 @@ impl<'a> Tokenizer<'a> {
             label_start_list_loose: vec![],
             media_list: vec![],
             interrupt: false,
+            lazy: false,
             resolvers: vec![],
             resolver_ids: vec![],
         }
@@ -2120,7 +2127,8 @@ impl<'a> Tokenizer<'a> {
             state_fn,
             until,
             vec![],
-            |result: (Vec<Code>, Vec<Code>), _ok, _tokenizer: &mut Tokenizer, state| {
+            |result: (Vec<Code>, Vec<Code>), _ok, tokenizer: &mut Tokenizer, state| {
+                tokenizer.consumed = true;
                 done(check_statefn_result((state, Some(result.1))))
             },
         )
@@ -2262,6 +2270,20 @@ fn attempt_impl(
     done: impl FnOnce((Vec<Code>, Vec<Code>), bool, &mut Tokenizer, State) -> StateFnResult + 'static,
 ) -> Box<StateFn> {
     Box::new(|tokenizer, code| {
+        // To do: `pause` is currently used after the code.
+        // Should it be before?
+        // How to match `eof`?
+        if !codes.is_empty() && pause(tokenizer.previous) {
+            tokenizer.consumed = true;
+            println!("pause!: {:?}", (codes.clone(), vec![code]));
+            return done(
+                (codes, vec![code]),
+                false,
+                tokenizer,
+                State::Fn(Box::new(state)),
+            );
+        }
+
         let (next, remainder) = check_statefn_result(state(tokenizer, code));
 
         match code {
@@ -2278,14 +2300,6 @@ fn attempt_impl(
             );
         }
 
-        // To do: `pause` is currently used after the code.
-        // Should it be before?
-        if pause(code) {
-            tokenizer.consumed = true;
-            let remaining = if let Some(x) = remainder { x } else { vec![] };
-            return done((codes, remaining), false, tokenizer, next);
-        }
-
         match next {
             State::Ok => {
                 let remaining = if let Some(x) = remainder { x } else { vec![] };
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
index ae627c1..f67a8b9 100644
--- a/src/util/edit_map.rs
+++ b/src/util/edit_map.rs
@@ -48,6 +48,7 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, isize)]) {
 
 /// Make it easy to insert and remove things while being performant and keeping
 /// links in check.
+#[derive(Debug)]
 pub struct EditMap {
     /// Whether this map was consumed already.
     consumed: bool,
diff --git a/src/util/mod.rs b/src/util/mod.rs
index d1a0e01..ae1add6 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -6,4 +6,5 @@ pub mod edit_map;
 pub mod encode;
 pub mod normalize_identifier;
 pub mod sanitize_uri;
+pub mod skip;
 pub mod span;
diff --git a/src/util/skip.rs b/src/util/skip.rs
new file mode 100644
index 0000000..2c4198a
--- /dev/null
+++ b/src/util/skip.rs
@@ -0,0 +1,44 @@
+use crate::tokenizer::{Event, TokenType};
+
+/// To do.
+pub fn opt(events: &[Event], index: usize, token_types: &[TokenType]) -> usize {
+    skip_opt_with_direction(events, index, token_types, true)
+}
+
+/// To do.
+pub fn opt_back(events: &[Event], index: usize, token_types: &[TokenType]) -> usize {
+    skip_opt_with_direction(events, index, token_types, false)
+}
+
+/// To do.
+fn skip_opt_with_direction(
+    events: &[Event],
+    index: usize,
+    token_types: &[TokenType],
+    forward: bool,
+) -> usize {
+    let mut index = index;
+
+    while index < events.len() {
+        let current = &events[index].token_type;
+
+        if !token_types.contains(current) {
+            break;
+        }
+
+        // assert_eq!(events[index].event_type, EventType::Enter);
+        index = if forward { index + 1 } else { index - 1 };
+
+        loop {
+            if events[index].token_type == *current {
+                // assert_eq!(events[index].event_type, EventType::Exit);
+                index = if forward { index + 1 } else { index - 1 };
+                break;
+            }
+
+            index = if forward { index + 1 } else { index - 1 };
+        }
+    }
+
+    index
+}
author	Titus Wormer <tituswormer@gmail.com>	2022-07-07 17:21:38 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-07 17:36:35 +0200
commit	4806864e5377a5fef937b3fa02542e620c547969 (patch)
tree	c91ae2bbd1dc2037f425efd24d62d05e706e3e60 /src
parent	c2b4402223e53498078fc33dd55aabc0a48cdb56 (diff)
download	markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.gz markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.bz2 markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.zip