aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-22 18:46:33 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-22 18:57:19 +0200
commitbac358ee5c341729e50630f2569a69b4d580ce47 (patch)
tree9ea5f311dcad46e54dfaa55a2985c75925ff6c83
parent0525454e33ed6bcd7b43da1c0969c1d592e743d9 (diff)
downloadmarkdown-rs-bac358ee5c341729e50630f2569a69b4d580ce47.tar.gz
markdown-rs-bac358ee5c341729e50630f2569a69b4d580ce47.tar.bz2
markdown-rs-bac358ee5c341729e50630f2569a69b4d580ce47.zip
Refactor to use a single shared edit map
-rw-r--r--src/construct/attention.rs14
-rw-r--r--src/construct/heading_atx.rs12
-rw-r--r--src/construct/heading_setext.rs11
-rw-r--r--src/construct/label_end.rs18
-rw-r--r--src/construct/list.rs10
-rw-r--r--src/construct/paragraph.rs11
-rw-r--r--src/construct/partial_data.rs10
-rw-r--r--src/content/document.rs8
-rw-r--r--src/tokenizer.rs23
-rw-r--r--src/util/edit_map.rs15
10 files changed, 50 insertions, 82 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index b4265f0..1aa25c0 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -54,7 +54,6 @@
use crate::token::Token;
use crate::tokenizer::{Code, Event, EventType, Point, State, Tokenizer};
use crate::unicode::PUNCTUATION;
-use crate::util::edit_map::EditMap;
/// Character code kinds.
#[derive(Debug, PartialEq)]
@@ -201,7 +200,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, marker: MarkerKind) -> State {
/// Resolve attention sequences.
#[allow(clippy::too_many_lines)]
-fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
+fn resolve_attention(tokenizer: &mut Tokenizer) {
let codes = &tokenizer.parse_state.codes;
let mut start = 0;
let mut balance = 0;
@@ -340,7 +339,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
// Remove closing sequence if fully used.
if sequence_close.size == 0 {
sequences.remove(close);
- map.add(close_event_index, 2, vec![]);
+ tokenizer.map.add(close_event_index, 2, vec![]);
} else {
// Shift remaining closing sequence forward.
// Do it here because a sequence can open and close different
@@ -362,7 +361,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
// Remove opening sequence if fully used.
if sequence_open.size == 0 {
sequences.remove(open);
- map.add(open_event_index, 2, vec![]);
+ tokenizer.map.add(open_event_index, 2, vec![]);
next_index -= 1;
} else {
// Shift remaining opening sequence backwards.
@@ -372,7 +371,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
}
// Opening.
- map.add_before(
+ tokenizer.map.add_before(
// Add after the current sequence (it might remain).
open_event_index + 2,
0,
@@ -420,7 +419,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
],
);
// Closing.
- map.add(
+ tokenizer.map.add(
close_event_index,
0,
vec![
@@ -484,8 +483,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
index += 1;
}
- // This resolver is needed.
- true
+ tokenizer.map.consume(&mut tokenizer.events);
}
/// Classify whether a character code represents whitespace, punctuation, or
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 52eca54..1eabb56 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -58,7 +58,6 @@ use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
use crate::token::Token;
use crate::tokenizer::{Code, ContentType, Event, EventType, State, Tokenizer};
-use crate::util::edit_map::EditMap;
/// Start of a heading (atx).
///
@@ -190,7 +189,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> State {
}
/// Resolve heading (atx).
-pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let mut index = 0;
let mut heading_start: Option<usize> = None;
let mut data_start: Option<usize> = None;
@@ -206,7 +205,7 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
// If `start` is some, `end` is too.
let end = data_end.unwrap();
- map.add(
+ tokenizer.map.add(
start,
0,
vec![Event {
@@ -218,9 +217,9 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
);
// Remove everything between the start and the end.
- map.add(start + 1, end - start - 1, vec![]);
+ tokenizer.map.add(start + 1, end - start - 1, vec![]);
- map.add(
+ tokenizer.map.add(
end + 1,
0,
vec![Event {
@@ -247,7 +246,4 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
index += 1;
}
-
- // This resolver improves events, but is not needed by other resolvers.
- false
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 828b7f4..7aa0054 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -61,7 +61,7 @@ use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::token::Token;
use crate::tokenizer::{Code, EventType, State, Tokenizer};
-use crate::util::{edit_map::EditMap, skip::opt_back as skip_opt_back};
+use crate::util::skip::opt_back as skip_opt_back;
/// Kind of underline.
#[derive(Debug, Clone, PartialEq)]
@@ -196,7 +196,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> State {
}
/// Resolve heading (setext).
-pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let mut index = 0;
let mut paragraph_enter = None;
let mut paragraph_exit = None;
@@ -228,13 +228,10 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let mut heading_exit = tokenizer.events[index].clone();
heading_exit.token_type = Token::HeadingSetext;
- map.add(enter, 0, vec![heading_enter]);
- map.add(index + 1, 0, vec![heading_exit]);
+ tokenizer.map.add(enter, 0, vec![heading_enter]);
+ tokenizer.map.add(index + 1, 0, vec![heading_exit]);
}
index += 1;
}
-
- // This resolver improves events, but is not needed by other resolvers.
- false
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 0b9654d..35dfcdf 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -156,7 +156,6 @@ use crate::construct::{
use crate::token::Token;
use crate::tokenizer::{Code, Event, EventType, Media, State, Tokenizer};
use crate::util::{
- edit_map::EditMap,
normalize_identifier::normalize_identifier,
span::{serialize, Span},
};
@@ -612,7 +611,7 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> State {
/// This turns correct label start (image, link) and label end into links and
/// images, or turns them back into data.
#[allow(clippy::too_many_lines)]
-pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
+pub fn resolve_media(tokenizer: &mut Tokenizer) {
let mut left = tokenizer.label_start_list_loose.split_off(0);
let mut left_2 = tokenizer.label_start_stack.split_off(0);
let media = tokenizer.media_list.split_off(0);
@@ -627,7 +626,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let data_enter_index = label_start.start.0;
let data_exit_index = label_start.start.1;
- map.add(
+ tokenizer.map.add(
data_enter_index,
data_exit_index - data_enter_index + 1,
vec![
@@ -671,7 +670,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let group_end_index = media.end.1;
// Insert a group enter and label enter.
- map.add(
+ tokenizer.map.add(
group_enter_index,
0,
vec![
@@ -697,7 +696,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
// Empty events not allowed.
if text_enter_index != text_exit_index {
// Insert a text enter.
- map.add(
+ tokenizer.map.add(
text_enter_index,
0,
vec![Event {
@@ -709,7 +708,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
);
// Insert a text exit.
- map.add(
+ tokenizer.map.add(
text_exit_index,
0,
vec![Event {
@@ -722,7 +721,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
}
// Insert a label exit.
- map.add(
+ tokenizer.map.add(
label_exit_index + 1,
0,
vec![Event {
@@ -734,7 +733,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
);
// Insert a group exit.
- map.add(
+ tokenizer.map.add(
group_end_index + 1,
0,
vec![Event {
@@ -748,6 +747,5 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
index += 1;
}
- // This resolver is needed to figure out interleaving with attention.
- true
+ tokenizer.map.consume(&mut tokenizer.events);
}
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 96113e6..7437d4a 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -52,7 +52,6 @@ use crate::construct::{
use crate::token::Token;
use crate::tokenizer::{Code, EventType, State, Tokenizer};
use crate::util::{
- edit_map::EditMap,
skip,
span::{codes as codes_from_span, from_exit_event},
};
@@ -388,7 +387,7 @@ fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> State {
}
/// Find adjacent list items with the same marker.
-pub fn resolve_list_item(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
+pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
let mut index = 0;
let mut balance = 0;
let mut lists_wip: Vec<(Kind, usize, usize, usize)> = vec![];
@@ -483,12 +482,9 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
list_start.token_type = token_type.clone();
list_end.token_type = token_type;
- map.add(list_item.2, 0, vec![list_start]);
- map.add(list_item.3 + 1, 0, vec![list_end]);
+ tokenizer.map.add(list_item.2, 0, vec![list_start]);
+ tokenizer.map.add(list_item.3 + 1, 0, vec![list_end]);
index += 1;
}
-
- // This resolver improves events, but is not needed by other resolvers.
- false
}
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 811bc75..5409532 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -34,7 +34,7 @@
use crate::token::Token;
use crate::tokenizer::{Code, ContentType, EventType, State, Tokenizer};
-use crate::util::{edit_map::EditMap, skip::opt as skip_opt};
+use crate::util::skip::opt as skip_opt;
/// Before a paragraph.
///
@@ -80,7 +80,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> State {
/// Merge “`Paragraph`”s, which currently span a single line, into actual
/// `Paragraph`s that span multiple lines.
-pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let len = tokenizer.events.len();
let mut index = 0;
@@ -104,10 +104,10 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
&& tokenizer.events[enter_next_index].token_type == Token::Paragraph
{
// Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph.
- map.add(exit_index, 3, vec![]);
+ tokenizer.map.add(exit_index, 3, vec![]);
// Remove Enter:Paragraph.
- map.add(enter_next_index, 1, vec![]);
+ tokenizer.map.add(enter_next_index, 1, vec![]);
// Add Exit:LineEnding position info to Exit:Data.
let line_ending_exit = &tokenizer.events[exit_index + 2];
@@ -141,6 +141,5 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
index += 1;
}
- // This resolver is needed by setext headings.
- true
+ tokenizer.map.consume(&mut tokenizer.events);
}
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 11064e6..d60ef36 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -8,7 +8,6 @@
use crate::token::Token;
use crate::tokenizer::{Code, EventType, State, Tokenizer};
-use crate::util::edit_map::EditMap;
/// At the beginning of data.
///
@@ -75,7 +74,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: &'static [Code]) -> State {
}
/// Merge adjacent data events.
-pub fn resolve_data(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
+pub fn resolve_data(tokenizer: &mut Tokenizer) {
let len = tokenizer.events.len();
let mut index = 0;
@@ -95,7 +94,9 @@ pub fn resolve_data(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
}
if exit_far_index > exit_index {
- map.add(exit_index, exit_far_index - exit_index, vec![]);
+ tokenizer
+ .map
+ .add(exit_index, exit_far_index - exit_index, vec![]);
// Change positional info.
let exit_far = &tokenizer.events[exit_far_index];
@@ -108,7 +109,4 @@ pub fn resolve_data(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
index += 1;
}
-
- // This resolver helps, but is not required for other resolvers.
- false
}
diff --git a/src/content/document.rs b/src/content/document.rs
index f2c73e4..c1017a7 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -19,7 +19,6 @@ use crate::token::Token;
use crate::tokenizer::{
Code, Container, ContainerState, Event, EventType, Point, State, StateFn, Tokenizer,
};
-use crate::util::edit_map::EditMap;
use crate::util::{
normalize_identifier::normalize_identifier,
skip,
@@ -454,7 +453,6 @@ fn exit_containers(
// Inject the container events.
fn resolve(tokenizer: &mut Tokenizer, info: &mut DocumentInfo) {
- let mut map = EditMap::new();
let mut index = 0;
let mut inject = info.inject.split_off(0);
inject.reverse();
@@ -463,7 +461,7 @@ fn resolve(tokenizer: &mut Tokenizer, info: &mut DocumentInfo) {
while let Some((before, mut after)) = inject.pop() {
if !before.is_empty() {
first_line_ending_in_run = None;
- map.add(index, 0, before);
+ tokenizer.map.add(index, 0, before);
}
while index < tokenizer.events.len() {
@@ -499,8 +497,8 @@ fn resolve(tokenizer: &mut Tokenizer, info: &mut DocumentInfo) {
subevent_index += 1;
}
- map.add(close_index, 0, after);
+ tokenizer.map.add(close_index, 0, after);
}
- map.consume(&mut tokenizer.events);
+ tokenizer.map.consume(&mut tokenizer.events);
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 685198e..eb8bac4 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -95,7 +95,7 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> State;
/// Resolvers are supposed to change the list of events, because parsing is
/// sometimes messy, and they help expose a cleaner interface of events to
/// the compiler and other users.
-pub type Resolver = dyn FnOnce(&mut Tokenizer, &mut EditMap) -> bool;
+pub type Resolver = dyn FnOnce(&mut Tokenizer);
/// The result of a state.
pub enum State {
@@ -191,6 +191,8 @@ pub struct Tokenizer<'a> {
pub stack: Vec<Token>,
/// Previous character code.
pub previous: Code,
+ /// To do.
+ pub map: EditMap,
/// Current character code.
current: Code,
/// Current relative and absolute place in the file.
@@ -246,6 +248,7 @@ impl<'a> Tokenizer<'a> {
stack: vec![],
events: vec![],
parse_state,
+ map: EditMap::new(),
label_start_stack: vec![],
label_start_list_loose: vec![],
media_list: vec![],
@@ -255,7 +258,7 @@ impl<'a> Tokenizer<'a> {
container: None,
// Assume about 10 resolvers.
resolvers: Vec::with_capacity(10),
- resolver_ids: Vec::with_capacity(10)
+ resolver_ids: Vec::with_capacity(10),
}
}
@@ -635,25 +638,13 @@ impl<'a> Tokenizer<'a> {
result = flush_impl(self, func);
self.drained = true;
- let mut map = EditMap::new();
- let mut consumed = false;
while !self.resolvers.is_empty() {
let resolver = self.resolvers.remove(0);
- let consume = resolver(self, &mut map);
-
- if consume {
- map.consume(&mut self.events);
- consumed = true;
- map = EditMap::new();
- } else {
- consumed = false;
- }
+ resolver(self);
}
- if !consumed {
- map.consume(&mut self.events);
- }
+ self.map.consume(&mut self.events);
}
result
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
index 3bcef48..4d9f557 100644
--- a/src/util/edit_map.rs
+++ b/src/util/edit_map.rs
@@ -57,8 +57,6 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) {
/// links in check.
#[derive(Debug)]
pub struct EditMap {
- /// Whether this map was consumed already.
- consumed: bool,
/// Record of changes.
map: Vec<(usize, usize, Vec<Event>)>,
}
@@ -66,10 +64,7 @@ pub struct EditMap {
impl EditMap {
/// Create a new edit map.
pub fn new() -> EditMap {
- EditMap {
- consumed: false,
- map: vec![],
- }
+ EditMap { map: vec![] }
}
/// Create an edit: a remove and/or add at a certain place.
pub fn add(&mut self, index: usize, remove: usize, add: Vec<Event>) {
@@ -84,8 +79,9 @@ impl EditMap {
self.map
.sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
- assert!(!self.consumed, "cannot consume after consuming");
- self.consumed = true;
+ if self.map.is_empty() {
+ return;
+ }
// Calculate jumps: where items in the current list move to.
let mut jumps = Vec::with_capacity(self.map.len());
@@ -118,12 +114,13 @@ impl EditMap {
while let Some(mut slice) = vecs.pop() {
events.append(&mut slice);
}
+
+ self.map.truncate(0);
}
}
/// Create an edit.
fn add_impl(edit_map: &mut EditMap, at: usize, remove: usize, mut add: Vec<Event>, before: bool) {
- assert!(!edit_map.consumed, "cannot add after consuming");
let mut index = 0;
if remove == 0 && add.is_empty() {