Fix jumps in `edit_map`

* Use resolve more often (e.g., heading (atx, setext)) * Fix to link whole phrasing (e.g., one big chunk of text in heading (atx, setext), titles, labels) * Replace `ChunkText`, `ChunkString`, with `event.content_type: Option<ContentType>` * Refactor to externalize `edit_map` from `label`
author: Titus Wormer <tituswormer@gmail.com> 2022-06-28 14:18:17 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-28 14:18:17 +0200
commit: dfd11b1bc155ae1fba9975a90c2dc83dc07697b4 (patch)
tree: 0dd150365a6ae1df4c4845518efafe02ab61cb77 /src/subtokenize.rs
parent: a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0 (diff)
download: markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.gz
markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.bz2
markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.zip
1 files changed, 99 insertions, 101 deletions
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 58db3c6..92ada04 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -9,8 +9,7 @@
 //! *   …must occur on [`Enter`][EventType::Enter] events only
 //! *   …must occur on void events (they are followed by their corresponding
 //!     [`Exit`][EventType::Exit] event)
-//! *   …must be headed by a [`ChunkString`][TokenType::ChunkString] or
-//!     [`ChunkText`][TokenType::ChunkText] event
+//! *   …must have `content_type` field to define the kind of subcontent
 //!
 //! Links will then be passed through a tokenizer for the corresponding content
 //! type by `subtokenize`.
@@ -21,15 +20,13 @@
 //! us from doing so due to definitions, which can occur after references, and
 //! thus the whole document needs to be parsed up to the level of definitions,
 //! before any level that can include references can be parsed.
-//!
-//! <!-- To do: `ChunkFlow` when it exists. -->
 
 /// To do: could we do without `HashMap`, so we don’t need `std`?
 use std::collections::HashMap;
 
 use crate::content::{string::start as string, text::start as text};
 use crate::parser::ParseState;
-use crate::tokenizer::{Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{ContentType, Event, EventType, State, StateFn, StateFnResult, Tokenizer};
 use crate::util::span;
 
 /// Create a link between two [`Event`][]s.
@@ -44,19 +41,19 @@ pub fn link(events: &mut [Event], index: usize) {
 /// To do
 pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
     let prev = &mut events[pevious];
-    // To do: force chunks?
-    // assert!(
-    //     prev.token_type == TokenType::ChunkString || prev.token_type == TokenType::ChunkText,
-    //     "{:?}",
-    //     prev.token_type.to_owned()
-    // );
+    assert!(
+        prev.content_type.is_some(),
+        "expected `content_type` on previous"
+    );
     assert_eq!(prev.event_type, EventType::Enter);
     prev.next = Some(next);
 
     let prev_ref = &events[pevious];
     let prev_exit_ref = &events[pevious + 1];
+    let curr_ref = &events[next];
     assert_eq!(prev_exit_ref.event_type, EventType::Exit);
     assert_eq!(prev_exit_ref.token_type, prev_ref.token_type);
+    assert_eq!(curr_ref.content_type, prev_ref.content_type);
 
     let curr = &mut events[next];
     assert_eq!(curr.event_type, EventType::Enter);
@@ -83,103 +80,104 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve
         let event = &events[index];
 
         // Find each first opening chunk.
-        if (event.token_type == TokenType::ChunkString
-                || event.token_type == TokenType::ChunkText) &&
-            event.event_type == EventType::Enter &&
-            // No need to enter linked events again.
-            event.previous == None
-        {
-            done = false;
-            // Index into `events` pointing to a chunk.
-            let mut index_opt: Option<usize> = Some(index);
-            // Subtokenizer.
-            let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state);
-            // Substate.
-            let mut result: StateFnResult = (
-                State::Fn(Box::new(if event.token_type == TokenType::ChunkString {
-                    string
-                } else {
-                    text
-                })),
-                None,
-            );
-            // Indices into `codes` of each end of chunk.
-            let mut ends: Vec<usize> = vec![];
-
-            // Loop through chunks to pass them in order to the subtokenizer.
-            while let Some(index_ptr) = index_opt {
-                let enter = &events[index_ptr];
-                assert_eq!(enter.event_type, EventType::Enter);
-                let span = span::Span {
-                    start_index: enter.index,
-                    end_index: events[index_ptr + 1].index,
-                };
-                ends.push(span.end_index);
-
-                if enter.previous != None {
-                    tokenizer.define_skip(&enter.point, span.start_index);
-                }
-
-                let func: Box<StateFn> = match result.0 {
-                    State::Fn(func) => func,
-                    _ => unreachable!("cannot be ok/nok"),
-                };
+        if let Some(ref content_type) = event.content_type {
+            assert_eq!(event.event_type, EventType::Enter);
 
-                result = tokenizer.push(
-                    span::codes(&parse_state.codes, &span),
-                    func,
-                    enter.next == None,
+            // No need to enter linked events again.
+            if event.previous == None {
+                done = false;
+                // Index into `events` pointing to a chunk.
+                let mut index_opt: Option<usize> = Some(index);
+                // Subtokenizer.
+                let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state);
+                // Substate.
+                let mut result: StateFnResult = (
+                    State::Fn(Box::new(if *content_type == ContentType::String {
+                        string
+                    } else {
+                        text
+                    })),
+                    None,
                 );
-                assert!(result.1.is_none(), "expected no remainder");
-                index_opt = enter.next;
-            }
-
-            // Now, loop through all subevents (and `ends`), to figure out
-            // which parts belong where.
-            // Current index.
-            let mut subindex = 0;
-            // Index into subevents that starts the current slice.
-            let mut last_start = 0;
-            // Counter into `ends`: the linked token we are at.
-            let mut end_index = 0;
-            let mut index_opt: Option<usize> = Some(index);
-
-            while subindex < tokenizer.events.len() {
-                let subevent = &mut tokenizer.events[subindex];
-
-                // Find the first event that starts after the end we’re looking
-                // for.
-                // To do: is this logic correct?
-                if subevent.event_type == EventType::Enter && subevent.index >= ends[end_index] {
-                    let link = index_opt.unwrap();
-                    link_to_info.insert(link, (index, last_start, subindex));
-
-                    last_start = subindex;
-                    end_index += 1;
-                    index_opt = events[link].next;
+                // Indices into `codes` of each end of chunk.
+                let mut ends: Vec<usize> = vec![];
+
+                // Loop through chunks to pass them in order to the subtokenizer.
+                while let Some(index_ptr) = index_opt {
+                    let enter = &events[index_ptr];
+                    assert_eq!(enter.event_type, EventType::Enter);
+                    let span = span::Span {
+                        start_index: enter.index,
+                        end_index: events[index_ptr + 1].index,
+                    };
+                    ends.push(span.end_index);
+
+                    if enter.previous != None {
+                        tokenizer.define_skip(&enter.point, span.start_index);
+                    }
+
+                    let func: Box<StateFn> = match result.0 {
+                        State::Fn(func) => func,
+                        _ => unreachable!("cannot be ok/nok"),
+                    };
+
+                    result = tokenizer.push(
+                        span::codes(&parse_state.codes, &span),
+                        func,
+                        enter.next == None,
+                    );
+                    assert!(result.1.is_none(), "expected no remainder");
+                    index_opt = enter.next;
                 }
 
-                // If there is a `next` link in the subevents, we have to change
-                // its index to account for the shifted events.
-                // If it points to a next event, we also change the next event’s
-                // reference back to *this* event.
-                if let Some(next) = subevent.next {
-                    // The `index` in `events` where the current link is,
-                    // minus 2 events (the enter and exit) for each removed
-                    // link.
-                    let shift = index_opt.unwrap() - (end_index * 2);
-
-                    subevent.next = Some(next + shift);
-                    let next_ev = &mut tokenizer.events[next];
-                    let previous = next_ev.previous.unwrap();
-                    next_ev.previous = Some(previous + shift);
+                // Now, loop through all subevents (and `ends`), to figure out
+                // which parts belong where.
+                // Current index.
+                let mut subindex = 0;
+                // Index into subevents that starts the current slice.
+                let mut last_start = 0;
+                // Counter into `ends`: the linked token we are at.
+                let mut end_index = 0;
+                let mut index_opt: Option<usize> = Some(index);
+
+                while subindex < tokenizer.events.len() {
+                    let subevent = &mut tokenizer.events[subindex];
+
+                    // Find the first event that starts after the end we’re looking
+                    // for.
+                    // To do: is this logic correct?
+                    if subevent.event_type == EventType::Enter && subevent.index >= ends[end_index]
+                    {
+                        let link = index_opt.unwrap();
+                        link_to_info.insert(link, (index, last_start, subindex));
+
+                        last_start = subindex;
+                        end_index += 1;
+                        index_opt = events[link].next;
+                    }
+
+                    // If there is a `next` link in the subevents, we have to change
+                    // its index to account for the shifted events.
+                    // If it points to a next event, we also change the next event’s
+                    // reference back to *this* event.
+                    if let Some(next) = subevent.next {
+                        // The `index` in `events` where the current link is,
+                        // minus 2 events (the enter and exit) for each removed
+                        // link.
+                        let shift = index_opt.unwrap() - (end_index * 2);
+
+                        subevent.next = Some(next + shift);
+                        let next_ev = &mut tokenizer.events[next];
+                        let previous = next_ev.previous.unwrap();
+                        next_ev.previous = Some(previous + shift);
+                    }
+
+                    subindex += 1;
                 }
 
-                subindex += 1;
+                link_to_info.insert(index_opt.unwrap(), (index, last_start, subindex));
+                head_to_tokenizer.insert(index, tokenizer);
             }
-
-            link_to_info.insert(index_opt.unwrap(), (index, last_start, subindex));
-            head_to_tokenizer.insert(index, tokenizer);
         }
 
         index += 1;
author	Titus Wormer <tituswormer@gmail.com>	2022-06-28 14:18:17 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-28 14:18:17 +0200
commit	dfd11b1bc155ae1fba9975a90c2dc83dc07697b4 (patch)
tree	0dd150365a6ae1df4c4845518efafe02ab61cb77 /src/subtokenize.rs
parent	a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0 (diff)
download	markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.gz markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.bz2 markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.zip