aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/construct/heading_setext.rs3
-rw-r--r--src/construct/paragraph.rs2
-rw-r--r--src/construct/partial_label.rs2
-rw-r--r--src/construct/partial_title.rs2
-rw-r--r--src/subtokenize.rs53
-rw-r--r--src/util/link.rs8
-rw-r--r--src/util/mod.rs1
7 files changed, 56 insertions, 15 deletions
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 7c41855..d9ff96c 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -51,8 +51,9 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::space_or_tab_opt;
+use crate::subtokenize::link;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::{link::link, span::from_exit_event};
+use crate::util::span::from_exit_event;
/// Kind of underline.
#[derive(Debug, Clone, PartialEq)]
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 624ee8e..b7ab919 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -34,8 +34,8 @@ use crate::construct::{
heading_atx::start as heading_atx, html_flow::start as html_flow,
partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break,
};
+use crate::subtokenize::link;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::link::link;
/// Before a paragraph.
///
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index c831eaf..c78278e 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -57,8 +57,8 @@
use crate::constant::LINK_REFERENCE_SIZE_MAX;
use crate::construct::partial_space_or_tab::space_or_tab_opt;
+use crate::subtokenize::link;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::link::link;
/// Configuration.
///
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index d02ce60..f2278c0 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -32,8 +32,8 @@
//! <!-- To do: link label end. -->
use crate::construct::partial_space_or_tab::space_or_tab_opt;
+use crate::subtokenize::link;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::link::link;
/// Configuration.
///
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 1188c61..690eb58 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -1,3 +1,29 @@
+//! Deal with content in other content.
+//!
+//! To deal with content in content, *you* (a `micromark-rs` contributor) add
+//! information on events.
+//! Events are a flat list, but they can be connected to each other by setting
+//! `previous` and `next` links.
+//! These links:
+//!
+//! * …must occur on [`Enter`][EventType::Enter] events only
+//! * …must occur on void events (they are followed by their corresponding
+//! [`Exit`][EventType::Exit] event)
+//! * …must be headed by a [`ChunkString`][TokenType::ChunkString] or
+//! [`ChunkText`][TokenType::ChunkText] event
+//!
+//! Links will then be passed through a tokenizer for the corresponding content
+//! type by `subtokenize`.
+//! The subevents they result in are split up into slots for each linked token
+//! and replace those links.
+//!
+//! Subevents are not immediately subtokenized again because markdown prevents
+//! us from doing so due to definitions, which can occur after references, and
+//! thus the whole document needs to be parsed up to the level of definitions,
+//! before any level that can include references can be parsed.
+//!
+//! <!-- To do: `ChunkFlow` when it exists. -->
+
use crate::content::{string::start as string, text::start as text};
use crate::tokenizer::{
Code, Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer,
@@ -5,11 +31,34 @@ use crate::tokenizer::{
use crate::util::span;
use std::collections::HashMap;
-/// To do.
+/// Create a link between two [`Event`][]s.
+///
+/// Arbitrary (void) events can be linked together.
+/// This optimizes for the common case where the token at `index` is connected
+/// to the previous void token.
+pub fn link(events: &mut [Event], index: usize) {
+ let prev = &mut events[index - 2];
+ assert_eq!(prev.event_type, EventType::Enter);
+ prev.next = Some(index);
+
+ let prev_ref = &events[index - 2];
+ let prev_exit_ref = &events[index - 1];
+ assert_eq!(prev_exit_ref.event_type, EventType::Exit);
+ assert_eq!(prev_exit_ref.token_type, prev_ref.token_type);
+
+ let curr = &mut events[index];
+ assert_eq!(curr.event_type, EventType::Enter);
+ curr.previous = Some(index - 2);
+ // Note: the exit of this event may not exist, so don’t check for that.
+}
+
+/// Parse linked events.
+///
+/// Supposed to be called repeatedly, returns `1: true` when done.
pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
let mut events = events;
let mut index = 0;
- // Map of first chunks its tokenizer.
+ // Map of first chunks to their tokenizer.
let mut head_to_tokenizer: HashMap<usize, Tokenizer> = HashMap::new();
// Map of chunks to their head and corresponding range of events.
let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new();
diff --git a/src/util/link.rs b/src/util/link.rs
deleted file mode 100644
index 917ce4d..0000000
--- a/src/util/link.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-//! To do.
-
-use crate::tokenizer::Event;
-
-pub fn link(events: &mut [Event], index: usize) {
- events[index - 2].next = Some(index);
- events[index].previous = Some(index - 2);
-}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index 5439c62..c3db267 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -2,6 +2,5 @@
pub mod decode_character_reference;
pub mod encode;
-pub mod link;
pub mod sanitize_uri;
pub mod span;