diff options
Diffstat (limited to '')
-rw-r--r-- | readme.md | 4 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 3 | ||||
-rw-r--r-- | src/construct/paragraph.rs | 2 | ||||
-rw-r--r-- | src/construct/partial_label.rs | 2 | ||||
-rw-r--r-- | src/construct/partial_title.rs | 2 | ||||
-rw-r--r-- | src/subtokenize.rs | 53 | ||||
-rw-r--r-- | src/util/link.rs | 8 | ||||
-rw-r--r-- | src/util/mod.rs | 1 |
8 files changed, 58 insertions, 17 deletions
@@ -68,8 +68,6 @@ cargo doc --document-private-items #### Docs -- [ ] (1) Add docs to `subtokenize.rs` -- [ ] (1) Add docs for `link.rs` - [ ] (1) Add docs for token types - [ ] (1) Add docs for tokenizer (`go`, `define_skip`, `account_for_potential_skip`, `attempt_5`, `attempt_7`, `call_multiple`) @@ -234,6 +232,8 @@ cargo doc --document-private-items - [x] (1) Configurable limit (destination) - [x] (1) Add docs for `default_line_ending` - [x] (1) Add docs for virtual spaces +- [x] (1) Add docs to `subtokenize.rs` +- [x] (1) Add docs for `link.rs` ### Extensions diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 7c41855..d9ff96c 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -51,8 +51,9 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::space_or_tab_opt; +use crate::subtokenize::link; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::{link::link, span::from_exit_event}; +use crate::util::span::from_exit_event; /// Kind of underline. #[derive(Debug, Clone, PartialEq)] diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 624ee8e..b7ab919 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -34,8 +34,8 @@ use crate::construct::{ heading_atx::start as heading_atx, html_flow::start as html_flow, partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break, }; +use crate::subtokenize::link; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::link::link; /// Before a paragraph. /// diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index c831eaf..c78278e 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -57,8 +57,8 @@ use crate::constant::LINK_REFERENCE_SIZE_MAX; use crate::construct::partial_space_or_tab::space_or_tab_opt; +use crate::subtokenize::link; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::link::link; /// Configuration. /// diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index d02ce60..f2278c0 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -32,8 +32,8 @@ //! <!-- To do: link label end. --> use crate::construct::partial_space_or_tab::space_or_tab_opt; +use crate::subtokenize::link; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::link::link; /// Configuration. /// diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 1188c61..690eb58 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -1,3 +1,29 @@ +//! Deal with content in other content. +//! +//! To deal with content in content, *you* (a `micromark-rs` contributor) add +//! information on events. +//! Events are a flat list, but they can be connected to each other by setting +//! `previous` and `next` links. +//! These links: +//! +//! * …must occur on [`Enter`][EventType::Enter] events only +//! * …must occur on void events (they are followed by their corresponding +//! [`Exit`][EventType::Exit] event) +//! * …must be headed by a [`ChunkString`][TokenType::ChunkString] or +//! [`ChunkText`][TokenType::ChunkText] event +//! +//! Links will then be passed through a tokenizer for the corresponding content +//! type by `subtokenize`. +//! The subevents they result in are split up into slots for each linked token +//! and replace those links. +//! +//! Subevents are not immediately subtokenized again because markdown prevents +//! us from doing so due to definitions, which can occur after references, and +//! thus the whole document needs to be parsed up to the level of definitions, +//! before any level that can include references can be parsed. +//! +//! <!-- To do: `ChunkFlow` when it exists. --> + use crate::content::{string::start as string, text::start as text}; use crate::tokenizer::{ Code, Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer, @@ -5,11 +31,34 @@ use crate::tokenizer::{ use crate::util::span; use std::collections::HashMap; -/// To do. +/// Create a link between two [`Event`][]s. +/// +/// Arbitrary (void) events can be linked together. +/// This optimizes for the common case where the token at `index` is connected +/// to the previous void token. +pub fn link(events: &mut [Event], index: usize) { + let prev = &mut events[index - 2]; + assert_eq!(prev.event_type, EventType::Enter); + prev.next = Some(index); + + let prev_ref = &events[index - 2]; + let prev_exit_ref = &events[index - 1]; + assert_eq!(prev_exit_ref.event_type, EventType::Exit); + assert_eq!(prev_exit_ref.token_type, prev_ref.token_type); + + let curr = &mut events[index]; + assert_eq!(curr.event_type, EventType::Enter); + curr.previous = Some(index - 2); + // Note: the exit of this event may not exist, so don’t check for that. +} + +/// Parse linked events. +/// +/// Supposed to be called repeatedly, returns `1: true` when done. pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) { let mut events = events; let mut index = 0; - // Map of first chunks its tokenizer. + // Map of first chunks to their tokenizer. let mut head_to_tokenizer: HashMap<usize, Tokenizer> = HashMap::new(); // Map of chunks to their head and corresponding range of events. let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new(); diff --git a/src/util/link.rs b/src/util/link.rs deleted file mode 100644 index 917ce4d..0000000 --- a/src/util/link.rs +++ /dev/null @@ -1,8 +0,0 @@ -//! To do. - -use crate::tokenizer::Event; - -pub fn link(events: &mut [Event], index: usize) { - events[index - 2].next = Some(index); - events[index].previous = Some(index - 2); -} diff --git a/src/util/mod.rs b/src/util/mod.rs index 5439c62..c3db267 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -2,6 +2,5 @@ pub mod decode_character_reference; pub mod encode; -pub mod link; pub mod sanitize_uri; pub mod span; |