Fix jumps in `edit_map`

* Use resolve more often (e.g., heading (atx, setext)) * Fix to link whole phrasing (e.g., one big chunk of text in heading (atx, setext), titles, labels) * Replace `ChunkText`, `ChunkString`, with `event.content_type: Option<ContentType>` * Refactor to externalize `edit_map` from `label`
author: Titus Wormer <tituswormer@gmail.com> 2022-06-28 14:18:17 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-28 14:18:17 +0200
commit: dfd11b1bc155ae1fba9975a90c2dc83dc07697b4 (patch)
tree: 0dd150365a6ae1df4c4845518efafe02ab61cb77 /src
parent: a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0 (diff)
download: markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.gz
markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.bz2
markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.zip
14 files changed, 580 insertions, 465 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 11dea29..019a53a 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -173,7 +173,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
     // let mut last_was_tag = false;
     let buffers: &mut Vec<Vec<String>> = &mut vec![vec![]];
     let mut atx_opening_sequence_size: Option<usize> = None;
-    let mut atx_heading_buffer: Option<String> = None;
     let mut heading_setext_buffer: Option<String> = None;
     let mut code_flow_seen_data: Option<bool> = None;
     let mut code_fenced_fences_count: Option<usize> = None;
@@ -265,7 +264,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
                 | TokenType::HardBreakTrailingSpace
                 | TokenType::HeadingAtx
                 | TokenType::HeadingAtxSequence
-                | TokenType::HeadingAtxSpaceOrTab
                 | TokenType::HeadingSetext
                 | TokenType::HeadingSetextUnderline
                 | TokenType::HtmlFlowData
@@ -628,25 +626,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
                         .expect("`atx_opening_sequence_size` must be set in headings");
                     buf_tail_mut(buffers).push(format!("</h{}>", rank));
                     atx_opening_sequence_size = None;
-                    atx_heading_buffer = None;
-                }
-                // `HeadingAtxSpaceOrTab` is ignored after the opening sequence,
-                // before the closing sequence, and after the closing sequence.
-                // But it is used around intermediate sequences.
-                // `atx_heading_buffer` is set to `Some` by the first `HeadingAtxText`.
-                // `HeadingAtxSequence` is ignored as the opening and closing sequence,
-                // but not when intermediate.
-                TokenType::HeadingAtxSequence | TokenType::HeadingAtxSpaceOrTab => {
-                    if let Some(buf) = atx_heading_buffer {
-                        atx_heading_buffer = Some(
-                            buf.to_string()
-                                + &encode_opt(
-                                    &serialize(codes, &from_exit_event(events, index), false),
-                                    ignore_encode,
-                                ),
-                        );
-                    }
-
+                }
+                TokenType::HeadingAtxSequence => {
                     // First fence we see.
                     if None == atx_opening_sequence_size {
                         let rank = serialize(codes, &from_exit_event(events, index), false).len();
@@ -655,18 +636,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
                     }
                 }
                 TokenType::HeadingAtxText => {
-                    let result = resume(buffers);
-
-                    if let Some(ref buf) = atx_heading_buffer {
-                        if !buf.is_empty() {
-                            buf_tail_mut(buffers).push(encode_opt(buf, ignore_encode));
-                            atx_heading_buffer = Some("".to_string());
-                        }
-                    } else {
-                        atx_heading_buffer = Some("".to_string());
-                    }
-
-                    buf_tail_mut(buffers).push(encode_opt(&result, ignore_encode));
+                    let value = resume(buffers);
+                    buf_tail_mut(buffers).push(value);
                 }
                 TokenType::HeadingSetextText => {
                     heading_setext_buffer = Some(resume(buffers));
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 5b1426c..1602aad 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -103,7 +103,7 @@
 
 use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
 use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
 use crate::util::span::from_exit_event;
 
 /// Kind of fences.
@@ -259,7 +259,7 @@ fn info_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
         }
         _ => {
             tokenizer.enter(TokenType::CodeFencedFenceInfo);
-            tokenizer.enter(TokenType::ChunkString);
+            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
             info_inside(tokenizer, code, info, vec![])
         }
     }
@@ -280,13 +280,13 @@ fn info_inside(
 ) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::Data);
             tokenizer.exit(TokenType::CodeFencedFenceInfo);
             tokenizer.exit(TokenType::CodeFencedFence);
             at_break(tokenizer, code, info)
         }
         Code::VirtualSpace | Code::Char('\t' | ' ') => {
-            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::Data);
             tokenizer.exit(TokenType::CodeFencedFenceInfo);
             tokenizer.attempt_opt(space_or_tab(), |t, c| meta_before(t, c, info))(tokenizer, code)
         }
@@ -317,7 +317,7 @@ fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
         }
         _ => {
             tokenizer.enter(TokenType::CodeFencedFenceMeta);
-            tokenizer.enter(TokenType::ChunkString);
+            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
             meta(tokenizer, code, info)
         }
     }
@@ -333,7 +333,7 @@ fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
 fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::Data);
             tokenizer.exit(TokenType::CodeFencedFenceMeta);
             tokenizer.exit(TokenType::CodeFencedFence);
             at_break(tokenizer, code, info)
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 1e5fe3d..2811894 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -40,7 +40,7 @@
 //! *   [`HeadingAtx`][TokenType::HeadingAtx]
 //! *   [`HeadingAtxSequence`][TokenType::HeadingAtxSequence]
 //! *   [`HeadingAtxText`][TokenType::HeadingAtxText]
-//! *   [`HeadingAtxSpaceOrTab`][TokenType::HeadingAtxSpaceOrTab]
+//! *   [`SpaceOrTab`][TokenType::SpaceOrTab]
 //!
 //! ## References
 //!
@@ -54,11 +54,12 @@
 //! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
 //! [atx]: http://www.aaronsw.com/2002/atx/
 
-use super::partial_space_or_tab::{
-    space_or_tab, space_or_tab_with_options, Options as SpaceOrTabOptions,
-};
+use super::partial_space_or_tab::space_or_tab;
 use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{
+    Code, ContentType, Event, EventType, State, StateFnResult, TokenType, Tokenizer,
+};
+use crate::util::edit_map::EditMap;
 
 /// Start of a heading (atx).
 ///
@@ -106,14 +107,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnR
         }
         _ if rank > 0 => {
             tokenizer.exit(TokenType::HeadingAtxSequence);
-            tokenizer.go(
-                space_or_tab_with_options(SpaceOrTabOptions {
-                    kind: TokenType::HeadingAtxSpaceOrTab,
-                    min: 1,
-                    max: usize::MAX,
-                }),
-                at_break,
-            )(tokenizer, code)
+            tokenizer.go(space_or_tab(), at_break)(tokenizer, code)
         }
         _ => (State::Nok, None),
     }
@@ -132,23 +126,18 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             tokenizer.exit(TokenType::HeadingAtx);
+            tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve));
             (State::Ok, Some(vec![code]))
         }
-        Code::VirtualSpace | Code::Char('\t' | ' ') => tokenizer.go(
-            space_or_tab_with_options(SpaceOrTabOptions {
-                kind: TokenType::HeadingAtxSpaceOrTab,
-                min: 1,
-                max: usize::MAX,
-            }),
-            at_break,
-        )(tokenizer, code),
+        Code::VirtualSpace | Code::Char('\t' | ' ') => {
+            tokenizer.go(space_or_tab(), at_break)(tokenizer, code)
+        }
         Code::Char('#') => {
             tokenizer.enter(TokenType::HeadingAtxSequence);
             further_sequence(tokenizer, code)
         }
         Code::Char(_) => {
-            tokenizer.enter(TokenType::HeadingAtxText);
-            tokenizer.enter(TokenType::ChunkText);
+            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
             data(tokenizer, code)
         }
     }
@@ -179,8 +168,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\t' | '\n' | '\r' | ' ') => {
-            tokenizer.exit(TokenType::ChunkText);
-            tokenizer.exit(TokenType::HeadingAtxText);
+            tokenizer.exit(TokenType::Data);
             at_break(tokenizer, code)
         }
         _ => {
@@ -189,3 +177,72 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         }
     }
 }
+
+/// To do.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+    let mut edit_map = EditMap::new();
+    let mut index = 0;
+    let mut heading_start: Option<usize> = None;
+    let mut data_start: Option<usize> = None;
+    let mut data_end: Option<usize> = None;
+
+    while index < tokenizer.events.len() {
+        let event = &tokenizer.events[index];
+
+        if event.token_type == TokenType::HeadingAtx {
+            if event.event_type == EventType::Enter {
+                heading_start = Some(index);
+            } else if let Some(start) = data_start {
+                // If `start` is some, `end` is too.
+                let end = data_end.unwrap();
+
+                edit_map.add(
+                    start,
+                    0,
+                    vec![Event {
+                        event_type: EventType::Enter,
+                        token_type: TokenType::HeadingAtxText,
+                        point: tokenizer.events[start].point.clone(),
+                        index: tokenizer.events[start].index,
+                        previous: None,
+                        next: None,
+                        content_type: None,
+                    }],
+                );
+
+                // Remove everything between the start and the end.
+                edit_map.add(start + 1, end - start - 1, vec![]);
+
+                edit_map.add(
+                    end + 1,
+                    0,
+                    vec![Event {
+                        event_type: EventType::Exit,
+                        token_type: TokenType::HeadingAtxText,
+                        point: tokenizer.events[end].point.clone(),
+                        index: tokenizer.events[end].index,
+                        previous: None,
+                        next: None,
+                        content_type: None,
+                    }],
+                );
+
+                heading_start = None;
+                data_start = None;
+                data_end = None;
+            }
+        } else if heading_start.is_some() && event.token_type == TokenType::Data {
+            if event.event_type == EventType::Enter {
+                if data_start.is_none() {
+                    data_start = Some(index);
+                }
+            } else {
+                data_end = Some(index);
+            }
+        }
+
+        index += 1;
+    }
+
+    edit_map.consume(&mut tokenizer.events)
+}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 06ce481..63f3c30 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -56,9 +56,9 @@
 //! [atx]: http://www.aaronsw.com/2002/atx/
 
 use crate::constant::TAB_SIZE;
-use crate::construct::partial_space_or_tab::space_or_tab;
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_with_options, Options};
 use crate::subtokenize::link;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
 use crate::util::span::from_exit_event;
 
 /// Kind of underline.
@@ -131,7 +131,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         }
         _ => {
             tokenizer.enter(TokenType::HeadingSetextText);
-            tokenizer.enter(TokenType::ChunkText);
+            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
             text_inside(tokenizer, code)
         }
     }
@@ -148,7 +148,7 @@ fn text_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None => (State::Nok, None),
         Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            tokenizer.exit(TokenType::ChunkText);
+            tokenizer.exit(TokenType::Data);
             tokenizer.exit(TokenType::HeadingSetextText);
             tokenizer.attempt(underline_before, |ok| {
                 Box::new(if ok { after } else { text_continue })
@@ -176,16 +176,23 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 
     match code {
         Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            tokenizer.enter(TokenType::LineEnding);
+            tokenizer.enter_with_content(TokenType::LineEnding, Some(ContentType::Text));
             let index = tokenizer.events.len() - 1;
             link(&mut tokenizer.events, index);
             tokenizer.consume(code);
             tokenizer.exit(TokenType::LineEnding);
 
             (
-                State::Fn(Box::new(
-                    tokenizer.attempt_opt(space_or_tab(), text_line_start),
-                )),
+                State::Fn(Box::new(tokenizer.attempt_opt(
+                    space_or_tab_with_options(Options {
+                        kind: TokenType::SpaceOrTab,
+                        min: 1,
+                        max: usize::MAX,
+                        content_type: Some(ContentType::Text),
+                        connect: true,
+                    }),
+                    text_line_start,
+                ))),
                 None,
             )
         }
@@ -201,18 +208,11 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// ==
 /// ```
 fn text_line_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    let index = tokenizer.events.len() - 2;
-
-    // Link the whitespace, if it exists.
-    if tokenizer.events[index].token_type == TokenType::SpaceOrTab {
-        link(&mut tokenizer.events, index);
-    }
-
     match code {
         // Blank lines not allowed.
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
         _ => {
-            tokenizer.enter(TokenType::ChunkText);
+            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
             let index = tokenizer.events.len() - 1;
             link(&mut tokenizer.events, index);
             text_inside(tokenizer, code)
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 405858d..6e8e476 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -11,11 +11,10 @@ use crate::tokenizer::{
     Code, Event, EventType, LabelStart, Media, State, StateFnResult, TokenType, Tokenizer,
 };
 use crate::util::{
+    edit_map::EditMap,
     normalize_identifier::normalize_identifier,
     span::{serialize, Span},
 };
-/// To do: could we do without `HashMap`, so we don’t need `std`?
-use std::collections::HashMap;
 
 #[derive(Debug)]
 struct Info {
@@ -32,43 +31,45 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
     let media: Vec<Media> = tokenizer.media_list.drain(..).collect();
     left.append(&mut left_2);
 
-    let mut map: HashMap<usize, (usize, Vec<Event>)> = HashMap::new();
+    let mut edit_map = EditMap::new();
     let events = &tokenizer.events;
 
+    // Remove loose label starts.
     let mut index = 0;
     while index < left.len() {
         let label_start = &left[index];
         let data_enter_index = label_start.start.0;
         let data_exit_index = label_start.start.1;
 
-        map.insert(
+        edit_map.add(
             data_enter_index,
-            (
-                data_exit_index - data_enter_index,
-                vec![
-                    Event {
-                        event_type: EventType::Enter,
-                        token_type: TokenType::Data,
-                        point: events[data_enter_index].point.clone(),
-                        index: events[data_enter_index].index,
-                        previous: None,
-                        next: None,
-                    },
-                    Event {
-                        event_type: EventType::Exit,
-                        token_type: TokenType::Data,
-                        point: events[data_exit_index].point.clone(),
-                        index: events[data_exit_index].index,
-                        previous: None,
-                        next: None,
-                    },
-                ],
-            ),
+            data_exit_index - data_enter_index,
+            vec![
+                Event {
+                    event_type: EventType::Enter,
+                    token_type: TokenType::Data,
+                    point: events[data_enter_index].point.clone(),
+                    index: events[data_enter_index].index,
+                    previous: None,
+                    next: None,
+                    content_type: None,
+                },
+                Event {
+                    event_type: EventType::Exit,
+                    token_type: TokenType::Data,
+                    point: events[data_exit_index].point.clone(),
+                    index: events[data_exit_index].index,
+                    previous: None,
+                    next: None,
+                    content_type: None,
+                },
+            ],
         );
 
         index += 1;
     }
 
+    // Add grouping events.
     let mut index = 0;
     while index < media.len() {
         let media = &media[index];
@@ -90,8 +91,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
         let group_end_index = media.end.1;
 
         // Insert a group enter and label enter.
-        add(
-            &mut map,
+        edit_map.add(
             group_enter_index,
             0,
             vec![
@@ -106,6 +106,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
                     index: group_enter_event.index,
                     previous: None,
                     next: None,
+                    content_type: None,
                 },
                 Event {
                     event_type: EventType::Enter,
@@ -114,6 +115,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
                     index: group_enter_event.index,
                     previous: None,
                     next: None,
+                    content_type: None,
                 },
             ],
         );
@@ -121,8 +123,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
         // Empty events not allowed.
         if text_enter_index != text_exit_index {
             // Insert a text enter.
-            add(
-                &mut map,
+            edit_map.add(
                 text_enter_index,
                 0,
                 vec![Event {
@@ -132,12 +133,12 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
                     index: events[text_enter_index].index,
                     previous: None,
                     next: None,
+                    content_type: None,
                 }],
             );
 
             // Insert a text exit.
-            add(
-                &mut map,
+            edit_map.add(
                 text_exit_index,
                 0,
                 vec![Event {
@@ -147,13 +148,13 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
                     index: events[text_exit_index].index,
                     previous: None,
                     next: None,
+                    content_type: None,
                 }],
             );
         }
 
         // Insert a label exit.
-        add(
-            &mut map,
+        edit_map.add(
             label_exit_index + 1,
             0,
             vec![Event {
@@ -163,12 +164,12 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
                 index: events[label_exit_index].index,
                 previous: None,
                 next: None,
+                content_type: None,
             }],
         );
 
         // Insert a group exit.
-        add(
-            &mut map,
+        edit_map.add(
             group_end_index + 1,
             0,
             vec![Event {
@@ -178,81 +179,14 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
                 index: events[group_end_index].index,
                 previous: None,
                 next: None,
+                content_type: None,
             }],
         );
 
         index += 1;
     }
 
-    let mut indices: Vec<&usize> = map.keys().collect();
-    indices.sort_unstable();
-    let mut next_events: Vec<Event> = vec![];
-    let mut index_into_indices = 0;
-    let mut start = 0;
-    let events = &mut tokenizer.events;
-    let mut shift: i32 = 0;
-
-    while index_into_indices < indices.len() {
-        let index = *indices[index_into_indices];
-
-        if start < index {
-            let append = &mut events[start..index].to_vec();
-            let mut index = 0;
-
-            while index < append.len() {
-                let ev = &mut append[index];
-
-                if let Some(x) = ev.previous {
-                    let next = (x as i32 + shift) as usize;
-                    ev.previous = Some(next);
-                    println!("todo: y: previous {:?} {:?} {:?}", x, shift, start);
-                }
-
-                if let Some(x) = ev.next {
-                    let next = (x as i32 + shift) as usize;
-                    ev.next = Some(next);
-                    println!("todo: y: next {:?} {:?} {:?}", x, shift, start);
-                }
-
-                index += 1;
-            }
-
-            next_events.append(append);
-        }
-
-        let (remove, add) = map.get(&index).unwrap();
-        shift += (add.len() as i32) - (*remove as i32);
-
-        if !add.is_empty() {
-            let append = &mut add.clone();
-            let mut index = 0;
-
-            while index < append.len() {
-                let ev = &mut append[index];
-
-                if let Some(x) = ev.previous {
-                    println!("todo: x: previous {:?} {:?} {:?}", x, shift, start);
-                }
-
-                if let Some(x) = ev.next {
-                    println!("todo: x: next {:?} {:?} {:?}", x, shift, start);
-                }
-
-                index += 1;
-            }
-
-            next_events.append(append);
-        }
-
-        start = index + remove;
-        index_into_indices += 1;
-    }
-
-    if start < events.len() {
-        next_events.append(&mut events[start..].to_vec());
-    }
-
-    next_events
+    edit_map.consume(&mut tokenizer.events)
 }
 
 /// Start of label end.
@@ -693,20 +627,3 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes
         _ => (State::Nok, None),
     }
 }
-
-pub fn add(
-    map: &mut HashMap<usize, (usize, Vec<Event>)>,
-    index: usize,
-    mut remove: usize,
-    mut add: Vec<Event>,
-) {
-    let curr = map.remove(&index);
-
-    if let Some((curr_rm, mut curr_add)) = curr {
-        remove += curr_rm;
-        curr_add.append(&mut add);
-        add = curr_add;
-    }
-
-    map.insert(index, (remove, add));
-}
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 13bd5aa..fea7052 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -39,7 +39,7 @@ use crate::construct::{
     partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break,
 };
 use crate::subtokenize::link;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
 
 /// Before a paragraph.
 ///
@@ -53,7 +53,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         }
         _ => {
             tokenizer.enter(TokenType::Paragraph);
-            tokenizer.enter(TokenType::ChunkText);
+            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
             inside(tokenizer, code)
         }
     }
@@ -86,8 +86,8 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// ```
 fn at_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     tokenizer.consume(code);
-    tokenizer.exit(TokenType::ChunkText);
-    tokenizer.enter(TokenType::ChunkText);
+    tokenizer.exit(TokenType::Data);
+    tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
     let index = tokenizer.events.len() - 1;
     link(&mut tokenizer.events, index);
     (State::Fn(Box::new(inside)), None)
@@ -100,7 +100,7 @@ fn at_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// ***
 /// ```
 fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.exit(TokenType::ChunkText);
+    tokenizer.exit(TokenType::Data);
     tokenizer.exit(TokenType::Paragraph);
     (State::Ok, Some(vec![code]))
 }
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 7887a44..05f5060 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -72,7 +72,7 @@
 //!
 //! <!-- To do: link label end. -->
 
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
 
 /// Configuration.
 ///
@@ -134,7 +134,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFn
             tokenizer.enter(info.options.destination.clone());
             tokenizer.enter(info.options.raw.clone());
             tokenizer.enter(info.options.string.clone());
-            tokenizer.enter(TokenType::ChunkString);
+            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
             raw(tokenizer, code, info)
         }
     }
@@ -155,7 +155,7 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFn
         (State::Ok, None)
     } else {
         tokenizer.enter(info.options.string.clone());
-        tokenizer.enter(TokenType::ChunkString);
+        tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
         enclosed(tokenizer, code, info)
     }
 }
@@ -168,7 +168,7 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFn
 fn enclosed(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     match code {
         Code::Char('>') => {
-            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::Data);
             tokenizer.exit(info.options.string.clone());
             enclosed_before(tokenizer, code, info)
         }
@@ -222,7 +222,7 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
         }
         Code::Char(')') => {
             if info.balance == 0 {
-                tokenizer.exit(TokenType::ChunkString);
+                tokenizer.exit(TokenType::Data);
                 tokenizer.exit(info.options.string.clone());
                 tokenizer.exit(info.options.raw.clone());
                 tokenizer.exit(info.options.destination);
@@ -240,7 +240,7 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
             if info.balance > 0 {
                 (State::Nok, None)
             } else {
-                tokenizer.exit(TokenType::ChunkString);
+                tokenizer.exit(TokenType::Data);
                 tokenizer.exit(info.options.string.clone());
                 tokenizer.exit(info.options.raw.clone());
                 tokenizer.exit(info.options.destination);
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 1cb7d4b..dd8ee84 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -55,10 +55,12 @@
 
 // To do: pass token types in.
 
+use super::partial_space_or_tab::{
+    space_or_tab_one_line_ending_with_options, OneLineEndingOptions,
+};
 use crate::constant::LINK_REFERENCE_SIZE_MAX;
-use crate::construct::partial_space_or_tab::space_or_tab;
 use crate::subtokenize::link;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
 
 /// Configuration.
 ///
@@ -130,8 +132,18 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
             tokenizer.exit(info.options.label);
             (State::Ok, None)
         }
+        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => tokenizer.go(
+            space_or_tab_one_line_ending_with_options(OneLineEndingOptions {
+                content_type: Some(ContentType::String),
+                connect: info.connect,
+            }),
+            |t, c| {
+                info.connect = true;
+                at_break(t, c, info)
+            },
+        )(tokenizer, code),
         _ => {
-            tokenizer.enter(TokenType::ChunkString);
+            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
 
             if info.connect {
                 let index = tokenizer.events.len() - 1;
@@ -145,30 +157,6 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
     }
 }
 
-/// After a line ending.
-///
-/// ```markdown
-/// [a
-/// |b]
-/// ```
-fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
-    tokenizer.attempt_opt(space_or_tab(), |t, c| line_begin(t, c, info))(tokenizer, code)
-}
-
-/// After a line ending, after optional whitespace.
-///
-/// ```markdown
-/// [a
-/// |b]
-/// ```
-fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
-    match code {
-        // Blank line not allowed.
-        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
-        _ => at_break(tokenizer, code, info),
-    }
-}
-
 /// In a label, in text.
 ///
 /// ```markdown
@@ -176,20 +164,14 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResul
 /// ```
 fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
     match code {
-        Code::None | Code::Char('[' | ']') => {
-            tokenizer.exit(TokenType::ChunkString);
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '[' | ']') => {
+            tokenizer.exit(TokenType::Data);
             at_break(tokenizer, code, info)
         }
         _ if info.size > LINK_REFERENCE_SIZE_MAX => {
-            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::Data);
             at_break(tokenizer, code, info)
         }
-        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
-            tokenizer.consume(code);
-            info.size += 1;
-            tokenizer.exit(TokenType::ChunkString);
-            (State::Fn(Box::new(|t, c| line_start(t, c, info))), None)
-        }
         Code::VirtualSpace | Code::Char('\t' | ' ') => {
             tokenizer.consume(code);
             info.size += 1;
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 43bdc53..8df7601 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -4,7 +4,8 @@
 //!
 //! *   [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js)
 
-use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer};
+use crate::subtokenize::link;
+use crate::tokenizer::{Code, ContentType, State, StateFn, StateFnResult, TokenType, Tokenizer};
 
 /// Options to parse whitespace.
 #[derive(Debug)]
@@ -15,6 +16,25 @@ pub struct Options {
     pub max: usize,
     /// Token type to use for whitespace events.
     pub kind: TokenType,
+    /// To do.
+    pub content_type: Option<ContentType>,
+    pub connect: bool,
+}
+
+#[derive(Debug)]
+pub struct OneLineEndingOptions {
+    /// To do.
+    pub content_type: Option<ContentType>,
+    pub connect: bool,
+}
+
+/// Options to parse whitespace.
+#[derive(Debug)]
+struct OneLineInfo {
+    /// Whether something was seen.
+    connect: bool,
+    /// Configuration.
+    options: OneLineEndingOptions,
 }
 
 /// Options to parse whitespace.
@@ -35,45 +55,6 @@ pub fn space_or_tab() -> Box<StateFn> {
     space_or_tab_min_max(1, usize::MAX)
 }
 
-pub fn space_or_tab_one_line_ending() -> Box<StateFn> {
-    Box::new(|tokenizer, code| {
-        tokenizer.attempt(space_or_tab(), move |ok| {
-            Box::new(move |tokenizer, code| match code {
-                Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
-                    tokenizer.enter(TokenType::LineEnding);
-                    tokenizer.consume(code);
-                    tokenizer.exit(TokenType::LineEnding);
-                    (
-                        State::Fn(Box::new(tokenizer.attempt_opt(
-                            space_or_tab(),
-                            move |_t, code| {
-                                if !matches!(
-                                    code,
-                                    Code::None
-                                        | Code::CarriageReturnLineFeed
-                                        | Code::Char('\r' | '\n')
-                                ) {
-                                    (State::Ok, Some(vec![code]))
-                                } else {
-                                    (State::Nok, None)
-                                }
-                            },
-                        ))),
-                        None,
-                    )
-                }
-                _ => {
-                    if ok {
-                        (State::Ok, Some(vec![code]))
-                    } else {
-                        (State::Nok, None)
-                    }
-                }
-            })
-        })(tokenizer, code)
-    })
-}
-
 /// Between `x` and `y` `space_or_tab`
 ///
 /// ```bnf
@@ -84,6 +65,8 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
         kind: TokenType::SpaceOrTab,
         min,
         max,
+        content_type: None,
+        connect: false,
     })
 }
 
@@ -104,7 +87,13 @@ pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
 fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
     match code {
         Code::VirtualSpace | Code::Char('\t' | ' ') if info.options.max > 0 => {
-            tokenizer.enter(info.options.kind.clone());
+            tokenizer.enter_with_content(info.options.kind.clone(), info.options.content_type);
+
+            if info.options.content_type.is_some() {
+                let index = tokenizer.events.len() - 1;
+                link(&mut tokenizer.events, index);
+            }
+
             tokenizer.consume(code);
             info.size += 1;
             (State::Fn(Box::new(|t, c| inside(t, c, info))), None)
@@ -146,3 +135,93 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResul
         }
     }
 }
+
+pub fn space_or_tab_one_line_ending() -> Box<StateFn> {
+    space_or_tab_one_line_ending_with_options(OneLineEndingOptions {
+        content_type: None,
+        connect: false,
+    })
+}
+
+pub fn space_or_tab_one_line_ending_with_options(options: OneLineEndingOptions) -> Box<StateFn> {
+    Box::new(move |tokenizer, code| {
+        let mut info = OneLineInfo {
+            connect: false,
+            options,
+        };
+
+        tokenizer.attempt(
+            space_or_tab_with_options(Options {
+                kind: TokenType::SpaceOrTab,
+                min: 1,
+                max: usize::MAX,
+                content_type: info.options.content_type,
+                connect: info.options.connect,
+            }),
+            move |ok| {
+                if ok && info.options.content_type.is_some() {
+                    info.connect = true;
+                }
+
+                Box::new(move |tokenizer, code| match code {
+                    Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+                        at_eol(tokenizer, code, info)
+                    }
+                    _ => {
+                        if ok {
+                            (State::Ok, Some(vec![code]))
+                        } else {
+                            (State::Nok, None)
+                        }
+                    }
+                })
+            },
+        )(tokenizer, code)
+    })
+}
+
+fn at_eol(tokenizer: &mut Tokenizer, code: Code, mut info: OneLineInfo) -> StateFnResult {
+    match code {
+        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+            tokenizer.enter_with_content(TokenType::LineEnding, info.options.content_type);
+
+            if info.options.content_type.is_some() {
+                if info.connect {
+                    let index = tokenizer.events.len() - 1;
+                    link(&mut tokenizer.events, index);
+                } else {
+                    info.connect = true;
+                }
+            }
+
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::LineEnding);
+            (
+                State::Fn(Box::new(tokenizer.attempt_opt(
+                    space_or_tab_with_options(Options {
+                        kind: TokenType::SpaceOrTab,
+                        min: 1,
+                        max: usize::MAX,
+                        content_type: info.options.content_type,
+                        connect: info.connect,
+                    }),
+                    after_eol,
+                ))),
+                None,
+            )
+        }
+        _ => unreachable!("expected eol"),
+    }
+}
+
+fn after_eol(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // Blank line not allowed.
+    if matches!(
+        code,
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
+    ) {
+        (State::Nok, None)
+    } else {
+        (State::Ok, Some(vec![code]))
+    }
+}
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 78ae311..b102f7e 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -31,9 +31,11 @@
 //!
 //! <!-- To do: link label end. -->
 
-use crate::construct::partial_space_or_tab::space_or_tab;
-use crate::subtokenize::link_to;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use super::partial_space_or_tab::{
+    space_or_tab_one_line_ending_with_options, OneLineEndingOptions,
+};
+use crate::subtokenize::link;
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
 
 /// Configuration.
 ///
@@ -108,8 +110,8 @@ impl Kind {
 /// State needed to parse titles.
 #[derive(Debug)]
 struct Info {
-    /// Whether we’ve seen our first `ChunkString`.
-    connect_index: Option<usize>,
+    /// Whether we’ve seen data.
+    connect: bool,
     /// Kind of title.
     kind: Kind,
     /// Configuration.
@@ -127,7 +129,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFn
     match code {
         Code::Char(char) if char == '"' || char == '\'' || char == '(' => {
             let info = Info {
-                connect_index: None,
+                connect: false,
                 kind: Kind::from_char(char),
                 options,
             };
@@ -181,14 +183,24 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
             begin(tokenizer, code, info)
         }
         Code::None => (State::Nok, None),
+        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => tokenizer.go(
+            space_or_tab_one_line_ending_with_options(OneLineEndingOptions {
+                content_type: Some(ContentType::String),
+                connect: info.connect,
+            }),
+            |t, c| {
+                info.connect = true;
+                at_break(t, c, info)
+            },
+        )(tokenizer, code),
         _ => {
-            tokenizer.enter(TokenType::ChunkString);
+            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
 
-            if let Some(connect_index) = info.connect_index {
+            if info.connect {
                 let index = tokenizer.events.len() - 1;
-                link_to(&mut tokenizer.events, connect_index, index);
+                link(&mut tokenizer.events, index);
             } else {
-                info.connect_index = Some(tokenizer.events.len() - 1);
+                info.connect = true;
             }
 
             title(tokenizer, code, info)
@@ -196,30 +208,6 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
     }
 }
 
-/// After a line ending.
-///
-/// ```markdown
-/// "a
-/// |b"
-/// ```
-fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
-    tokenizer.attempt_opt(space_or_tab(), |t, c| line_begin(t, c, info))(tokenizer, code)
-}
-
-/// After a line ending, after optional whitespace.
-///
-/// ```markdown
-/// "a
-/// |b"
-/// ```
-fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
-    match code {
-        // Blank line not allowed.
-        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
-        _ => at_break(tokenizer, code, info),
-    }
-}
-
 /// In title text.
 ///
 /// ```markdown
@@ -228,18 +216,13 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResul
 fn title(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     match code {
         Code::Char(char) if char == info.kind.as_char() => {
-            tokenizer.exit(TokenType::ChunkString);
+            tokenizer.exit(TokenType::Data);
             at_break(tokenizer, code, info)
         }
-        Code::None => {
-            tokenizer.exit(TokenType::ChunkString);
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+            tokenizer.exit(TokenType::Data);
             at_break(tokenizer, code, info)
         }
-        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
-            tokenizer.consume(code);
-            tokenizer.exit(TokenType::ChunkString);
-            (State::Fn(Box::new(|t, c| line_start(t, c, info))), None)
-        }
         Code::Char('\\') => {
             tokenizer.consume(code);
             (State::Fn(Box::new(|t, c| escape(t, c, info))), None)
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 58db3c6..92ada04 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -9,8 +9,7 @@
 //! *   …must occur on [`Enter`][EventType::Enter] events only
 //! *   …must occur on void events (they are followed by their corresponding
 //!     [`Exit`][EventType::Exit] event)
-//! *   …must be headed by a [`ChunkString`][TokenType::ChunkString] or
-//!     [`ChunkText`][TokenType::ChunkText] event
+//! *   …must have `content_type` field to define the kind of subcontent
 //!
 //! Links will then be passed through a tokenizer for the corresponding content
 //! type by `subtokenize`.
@@ -21,15 +20,13 @@
 //! us from doing so due to definitions, which can occur after references, and
 //! thus the whole document needs to be parsed up to the level of definitions,
 //! before any level that can include references can be parsed.
-//!
-//! <!-- To do: `ChunkFlow` when it exists. -->
 
 /// To do: could we do without `HashMap`, so we don’t need `std`?
 use std::collections::HashMap;
 
 use crate::content::{string::start as string, text::start as text};
 use crate::parser::ParseState;
-use crate::tokenizer::{Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{ContentType, Event, EventType, State, StateFn, StateFnResult, Tokenizer};
 use crate::util::span;
 
 /// Create a link between two [`Event`][]s.
@@ -44,19 +41,19 @@ pub fn link(events: &mut [Event], index: usize) {
 /// To do
 pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
     let prev = &mut events[pevious];
-    // To do: force chunks?
-    // assert!(
-    //     prev.token_type == TokenType::ChunkString || prev.token_type == TokenType::ChunkText,
-    //     "{:?}",
-    //     prev.token_type.to_owned()
-    // );
+    assert!(
+        prev.content_type.is_some(),
+        "expected `content_type` on previous"
+    );
     assert_eq!(prev.event_type, EventType::Enter);
     prev.next = Some(next);
 
     let prev_ref = &events[pevious];
     let prev_exit_ref = &events[pevious + 1];
+    let curr_ref = &events[next];
     assert_eq!(prev_exit_ref.event_type, EventType::Exit);
     assert_eq!(prev_exit_ref.token_type, prev_ref.token_type);
+    assert_eq!(curr_ref.content_type, prev_ref.content_type);
 
     let curr = &mut events[next];
     assert_eq!(curr.event_type, EventType::Enter);
@@ -83,103 +80,104 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve
         let event = &events[index];
 
         // Find each first opening chunk.
-        if (event.token_type == TokenType::ChunkString
-                || event.token_type == TokenType::ChunkText) &&
-            event.event_type == EventType::Enter &&
-            // No need to enter linked events again.
-            event.previous == None
-        {
-            done = false;
-            // Index into `events` pointing to a chunk.
-            let mut index_opt: Option<usize> = Some(index);
-            // Subtokenizer.
-            let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state);
-            // Substate.
-            let mut result: StateFnResult = (
-                State::Fn(Box::new(if event.token_type == TokenType::ChunkString {
-                    string
-                } else {
-                    text
-                })),
-                None,
-            );
-            // Indices into `codes` of each end of chunk.
-            let mut ends: Vec<usize> = vec![];
-
-            // Loop through chunks to pass them in order to the subtokenizer.
-            while let Some(index_ptr) = index_opt {
-                let enter = &events[index_ptr];
-                assert_eq!(enter.event_type, EventType::Enter);
-                let span = span::Span {
-                    start_index: enter.index,
-                    end_index: events[index_ptr + 1].index,
-                };
-                ends.push(span.end_index);
-
-                if enter.previous != None {
-                    tokenizer.define_skip(&enter.point, span.start_index);
-                }
-
-                let func: Box<StateFn> = match result.0 {
-                    State::Fn(func) => func,
-                    _ => unreachable!("cannot be ok/nok"),
-                };
+        if let Some(ref content_type) = event.content_type {
+            assert_eq!(event.event_type, EventType::Enter);
 
-                result = tokenizer.push(
-                    span::codes(&parse_state.codes, &span),
-                    func,
-                    enter.next == None,
+            // No need to enter linked events again.
+            if event.previous == None {
+                done = false;
+                // Index into `events` pointing to a chunk.
+                let mut index_opt: Option<usize> = Some(index);
+                // Subtokenizer.
+                let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state);
+                // Substate.
+                let mut result: StateFnResult = (
+                    State::Fn(Box::new(if *content_type == ContentType::String {
+                        string
+                    } else {
+                        text
+                    })),
+                    None,
                 );
-                assert!(result.1.is_none(), "expected no remainder");
-                index_opt = enter.next;
-            }
-
-            // Now, loop through all subevents (and `ends`), to figure out
-            // which parts belong where.
-            // Current index.
-            let mut subindex = 0;
-            // Index into subevents that starts the current slice.
-            let mut last_start = 0;
-            // Counter into `ends`: the linked token we are at.
-            let mut end_index = 0;
-            let mut index_opt: Option<usize> = Some(index);
-
-            while subindex < tokenizer.events.len() {
-                let subevent = &mut tokenizer.events[subindex];
-
-                // Find the first event that starts after the end we’re looking
-                // for.
-                // To do: is this logic correct?
-                if subevent.event_type == EventType::Enter && subevent.index >= ends[end_index] {
-                    let link = index_opt.unwrap();
-                    link_to_info.insert(link, (index, last_start, subindex));
-
-                    last_start = subindex;
-                    end_index += 1;
-                    index_opt = events[link].next;
+                // Indices into `codes` of each end of chunk.
+                let mut ends: Vec<usize> = vec![];
+
+                // Loop through chunks to pass them in order to the subtokenizer.
+                while let Some(index_ptr) = index_opt {
+                    let enter = &events[index_ptr];
+                    assert_eq!(enter.event_type, EventType::Enter);
+                    let span = span::Span {
+                        start_index: enter.index,
+                        end_index: events[index_ptr + 1].index,
+                    };
+                    ends.push(span.end_index);
+
+                    if enter.previous != None {
+                        tokenizer.define_skip(&enter.point, span.start_index);
+                    }
+
+                    let func: Box<StateFn> = match result.0 {
+                        State::Fn(func) => func,
+                        _ => unreachable!("cannot be ok/nok"),
+                    };
+
+                    result = tokenizer.push(
+                        span::codes(&parse_state.codes, &span),
+                        func,
+                        enter.next == None,
+                    );
+                    assert!(result.1.is_none(), "expected no remainder");
+                    index_opt = enter.next;
                 }
 
-                // If there is a `next` link in the subevents, we have to change
-                // its index to account for the shifted events.
-                // If it points to a next event, we also change the next event’s
-                // reference back to *this* event.
-                if let Some(next) = subevent.next {
-                    // The `index` in `events` where the current link is,
-                    // minus 2 events (the enter and exit) for each removed
-                    // link.
-                    let shift = index_opt.unwrap() - (end_index * 2);
-
-                    subevent.next = Some(next + shift);
-                    let next_ev = &mut tokenizer.events[next];
-                    let previous = next_ev.previous.unwrap();
-                    next_ev.previous = Some(previous + shift);
+                // Now, loop through all subevents (and `ends`), to figure out
+                // which parts belong where.
+                // Current index.
+                let mut subindex = 0;
+                // Index into subevents that starts the current slice.
+                let mut last_start = 0;
+                // Counter into `ends`: the linked token we are at.
+                let mut end_index = 0;
+                let mut index_opt: Option<usize> = Some(index);
+
+                while subindex < tokenizer.events.len() {
+                    let subevent = &mut tokenizer.events[subindex];
+
+                    // Find the first event that starts after the end we’re looking
+                    // for.
+                    // To do: is this logic correct?
+                    if subevent.event_type == EventType::Enter && subevent.index >= ends[end_index]
+                    {
+                        let link = index_opt.unwrap();
+                        link_to_info.insert(link, (index, last_start, subindex));
+
+                        last_start = subindex;
+                        end_index += 1;
+                        index_opt = events[link].next;
+                    }
+
+                    // If there is a `next` link in the subevents, we have to change
+                    // its index to account for the shifted events.
+                    // If it points to a next event, we also change the next event’s
+                    // reference back to *this* event.
+                    if let Some(next) = subevent.next {
+                        // The `index` in `events` where the current link is,
+                        // minus 2 events (the enter and exit) for each removed
+                        // link.
+                        let shift = index_opt.unwrap() - (end_index * 2);
+
+                        subevent.next = Some(next + shift);
+                        let next_ev = &mut tokenizer.events[next];
+                        let previous = next_ev.previous.unwrap();
+                        next_ev.previous = Some(previous + shift);
+                    }
+
+                    subindex += 1;
                 }
 
-                subindex += 1;
+                link_to_info.insert(index_opt.unwrap(), (index, last_start, subindex));
+                head_to_tokenizer.insert(index, tokenizer);
             }
-
-            link_to_info.insert(index_opt.unwrap(), (index, last_start, subindex));
-            head_to_tokenizer.insert(index, tokenizer);
         }
 
         index += 1;
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index a692a4d..cba055d 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -871,7 +871,7 @@ pub enum TokenType {
     /// *   **Content model**:
     ///     [`HeadingAtxSequence`][TokenType::HeadingAtxSequence],
     ///     [`HeadingAtxText`][TokenType::HeadingAtxText],
-    ///     [`HeadingAtxSpaceOrTab`][TokenType::HeadingAtxSpaceOrTab]
+    ///     [`SpaceOrTab`][TokenType::SpaceOrTab]
     /// *   **Construct**:
     ///     [`heading_atx`][crate::construct::heading_atx]
     ///
@@ -887,8 +887,7 @@ pub enum TokenType {
     /// ## Info
     ///
     /// *   **Context**:
-    ///     [`HeadingAtx`][TokenType::HeadingAtx],
-    ///     [flow content][crate::content::flow]
+    ///     [`HeadingAtx`][TokenType::HeadingAtx]
     /// *   **Content model**:
     ///     void
     /// *   **Construct**:
@@ -908,7 +907,7 @@ pub enum TokenType {
     /// *   **Context**:
     ///     [`HeadingAtx`][TokenType::HeadingAtx],
     /// *   **Content model**:
-    ///     [string content][crate::content::string]
+    ///     [text content][crate::content::text]
     /// *   **Construct**:
     ///     [`heading_atx`][crate::construct::heading_atx]
     ///
@@ -919,24 +918,6 @@ pub enum TokenType {
     ///       ^^^^^
     /// ```
     HeadingAtxText,
-    /// Heading (atx) spaces.
-    ///
-    /// ## Info
-    ///
-    /// *   **Context**:
-    ///     [`HeadingAtx`][TokenType::HeadingAtx],
-    /// *   **Content model**:
-    ///     void
-    /// *   **Construct**:
-    ///     [`heading_atx`][crate::construct::heading_atx]
-    ///
-    /// ## Example
-    ///
-    /// ```markdown
-    /// > | # alpha
-    ///      ^
-    /// ```
-    HeadingAtxSpaceOrTab,
     /// Whole heading (setext).
     ///
     /// ## Info
@@ -1194,18 +1175,13 @@ pub enum TokenType {
     ///     ^ ^ ^ ^
     /// ```
     SpaceOrTab,
+}
 
-    /// Chunk (string).
-    ///
-    /// Tokenized where [string content][crate::content::string] can exist and
-    /// unraveled by [`subtokenize`][crate::subtokenize].
-    ChunkString,
-
-    /// Chunk (text).
-    ///
-    /// Tokenized where [text content][crate::content::text] can exist and
-    /// unraveled by [`subtokenize`][crate::subtokenize].
-    ChunkText,
+/// To do
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum ContentType {
+    Text,
+    String,
 }
 
 /// Enum representing a character code.
@@ -1259,6 +1235,7 @@ pub struct Event {
     pub index: usize,
     pub previous: Option<usize>,
     pub next: Option<usize>,
+    pub content_type: Option<ContentType>,
 }
 
 /// The essence of the state machine are functions: `StateFn`.
@@ -1467,6 +1444,10 @@ impl<'a> Tokenizer<'a> {
 
     /// Mark the start of a semantic label.
     pub fn enter(&mut self, token_type: TokenType) {
+        self.enter_with_content(token_type, None);
+    }
+
+    pub fn enter_with_content(&mut self, token_type: TokenType, content_type: Option<ContentType>) {
         log::debug!("enter `{:?}` ({:?})", token_type, self.point);
         self.events.push(Event {
             event_type: EventType::Enter,
@@ -1475,6 +1456,7 @@ impl<'a> Tokenizer<'a> {
             index: self.index,
             previous: None,
             next: None,
+            content_type,
         });
         self.stack.push(token_type);
     }
@@ -1504,6 +1486,7 @@ impl<'a> Tokenizer<'a> {
             index: self.index,
             previous: None,
             next: None,
+            content_type: None,
         });
     }
 
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
new file mode 100644
index 0000000..8136306
--- /dev/null
+++ b/src/util/edit_map.rs
@@ -0,0 +1,144 @@
+use crate::tokenizer::Event;
+
+/// To do: could we do without `HashMap`, so we don’t need `std`?
+use std::collections::HashMap;
+
+pub fn shift_links(events: &mut [Event], jumps: &[(usize, isize)]) {
+    let map = |before| {
+        let mut jump_index = 0;
+        let mut jump = 0;
+
+        while jump_index < jumps.len() {
+            if jumps[jump_index].0 > before {
+                break;
+            }
+
+            jump = jumps[jump_index].1;
+            jump_index += 1;
+        }
+
+        #[allow(clippy::pedantic)]
+        let next_i = (before as isize) + jump;
+        assert!(next_i >= 0, "cannot shift before `0`");
+        #[allow(clippy::pedantic)]
+        let next = next_i as usize;
+        next
+    };
+
+    let mut index = 0;
+
+    while index < events.len() {
+        let event = &mut events[index];
+        event.previous = event.previous.map(map);
+        event.next = event.next.map(map);
+        index += 1;
+    }
+}
+
+/// Make it easy to insert and remove things while being performant and keeping
+/// links in check.
+pub struct EditMap {
+    consumed: bool,
+    map: HashMap<usize, (usize, Vec<Event>)>,
+}
+
+impl EditMap {
+    /// Create a new edit map.
+    pub fn new() -> EditMap {
+        EditMap {
+            consumed: false,
+            map: HashMap::new(),
+        }
+    }
+    /// Create an edit: a remove and/or add at a certain place.
+    pub fn add(&mut self, index: usize, mut remove: usize, mut add: Vec<Event>) {
+        assert!(!self.consumed, "cannot add after consuming");
+
+        if let Some((curr_remove, mut curr_add)) = self.map.remove(&index) {
+            remove += curr_remove;
+            curr_add.append(&mut add);
+            add = curr_add;
+        }
+
+        self.map.insert(index, (remove, add));
+    }
+    /// Done, change the events.
+    pub fn consume(&mut self, events: &mut [Event]) -> Vec<Event> {
+        let mut indices: Vec<&usize> = self.map.keys().collect();
+        let mut next_events: Vec<Event> = vec![];
+        let mut start = 0;
+
+        assert!(!self.consumed, "cannot consume after consuming");
+        self.consumed = true;
+
+        let mut index = 0;
+
+        while index < events.len() {
+            let event = &events[index];
+            println!(
+                "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
+                index,
+                event.event_type,
+                event.token_type,
+                event.content_type,
+                event.previous,
+                event.next
+            );
+            index += 1;
+        }
+
+        indices.sort_unstable();
+
+        let mut jumps: Vec<(usize, isize)> = vec![];
+        let mut index_into_indices = 0;
+        let mut shift: isize = 0;
+        while index_into_indices < indices.len() {
+            let index = *indices[index_into_indices];
+            let edit = self.map.get(&index).unwrap();
+            println!("?? {:?} {:?} {:?}", shift, edit.1.len(), edit.0);
+
+            #[allow(clippy::pedantic)]
+            let next = shift + (edit.1.len() as isize) - (edit.0 as isize);
+            shift = next;
+            jumps.push((index, shift));
+            index_into_indices += 1;
+        }
+
+        let mut index_into_indices = 0;
+
+        while index_into_indices < indices.len() {
+            let index = *indices[index_into_indices];
+
+            if start < index {
+                let append = &mut events[start..index].to_vec();
+                shift_links(append, &jumps);
+                next_events.append(append);
+            }
+
+            let (remove, add) = self.map.get(&index).unwrap();
+
+            if !add.is_empty() {
+                let append = &mut add.clone();
+                let mut index = 0;
+
+                while index < append.len() {
+                    let event = &mut append[index];
+                    assert!(event.previous.is_none(), "to do?");
+                    assert!(event.next.is_none(), "to do?");
+                    index += 1;
+                }
+
+                next_events.append(append);
+            }
+
+            start = index + remove;
+            index_into_indices += 1;
+        }
+
+        if start < events.len() {
+            next_events.append(&mut events[start..].to_vec());
+        }
+
+        next_events
+    }
+}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index ee58518..68ef275 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -1,6 +1,7 @@
 //! Utilities used when compiling markdown.
 
 pub mod decode_character_reference;
+pub mod edit_map;
 pub mod encode;
 pub mod normalize_identifier;
 pub mod sanitize_uri;
author	Titus Wormer <tituswormer@gmail.com>	2022-06-28 14:18:17 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-28 14:18:17 +0200
commit	dfd11b1bc155ae1fba9975a90c2dc83dc07697b4 (patch)
tree	0dd150365a6ae1df4c4845518efafe02ab61cb77 /src
parent	a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0 (diff)
download	markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.gz markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.bz2 markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.zip