aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-20 18:00:52 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-20 18:00:52 +0200
commit75c2109c6051009b220436bd823970a374f4f9fd (patch)
treec8123e6e4a0a71e2daec77d76c0dc4485bad2240
parent86801cdb3b114b30a14d4b8c01c8fb70b2bcee82 (diff)
downloadmarkdown-rs-75c2109c6051009b220436bd823970a374f4f9fd.tar.gz
markdown-rs-75c2109c6051009b220436bd823970a374f4f9fd.tar.bz2
markdown-rs-75c2109c6051009b220436bd823970a374f4f9fd.zip
Refactor to share edit map
-rw-r--r--src/construct/attention.rs14
-rw-r--r--src/construct/heading_atx.rs12
-rw-r--r--src/construct/heading_setext.rs10
-rw-r--r--src/construct/label_end.rs18
-rw-r--r--src/construct/list.rs10
-rw-r--r--src/construct/paragraph.rs10
-rw-r--r--src/construct/partial_data.rs8
-rw-r--r--src/subtokenize.rs6
-rw-r--r--src/tokenizer.rs19
9 files changed, 61 insertions, 46 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 2cbc563..f0de63f 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -205,9 +205,8 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, marker: MarkerKind) -> StateFnR
/// Resolve attention sequences.
#[allow(clippy::too_many_lines)]
-fn resolve_attention(tokenizer: &mut Tokenizer) {
+fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let codes = &tokenizer.parse_state.codes;
- let mut edit_map = EditMap::new();
let mut start = 0;
let mut balance = 0;
let mut sequences: Vec<Sequence> = vec![];
@@ -353,7 +352,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
// Remove closing sequence if fully used.
if sequence_close.size == 0 {
sequences.remove(close);
- edit_map.add(close_event_index, 2, vec![]);
+ map.add(close_event_index, 2, vec![]);
} else {
// Shift remaining closing sequence forward.
// Do it here because a sequence can open and close different
@@ -376,7 +375,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
// Remove opening sequence if fully used.
if sequence_open.size == 0 {
sequences.remove(open);
- edit_map.add(open_event_index, 2, vec![]);
+ map.add(open_event_index, 2, vec![]);
next_index -= 1;
} else {
// Shift remaining opening sequence backwards.
@@ -387,7 +386,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
}
// Opening.
- edit_map.add_before(
+ map.add_before(
// Add after the current sequence (it might remain).
open_event_index + 2,
0,
@@ -447,7 +446,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
],
);
// Closing.
- edit_map.add(
+ map.add(
close_event_index,
0,
vec![
@@ -523,7 +522,8 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
index += 1;
}
- edit_map.consume(&mut tokenizer.events);
+ // This resolver is needed.
+ true
}
/// Classify whether a character code represents whitespace, punctuation, or
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index feb1e9d..bf3de23 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -193,8 +193,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
/// Resolve heading (atx).
-pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut edit_map = EditMap::new();
+pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let mut index = 0;
let mut heading_start: Option<usize> = None;
let mut data_start: Option<usize> = None;
@@ -210,7 +209,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
// If `start` is some, `end` is too.
let end = data_end.unwrap();
- edit_map.add(
+ map.add(
start,
0,
vec![Event {
@@ -225,9 +224,9 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
);
// Remove everything between the start and the end.
- edit_map.add(start + 1, end - start - 1, vec![]);
+ map.add(start + 1, end - start - 1, vec![]);
- edit_map.add(
+ map.add(
end + 1,
0,
vec![Event {
@@ -258,5 +257,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
index += 1;
}
- edit_map.consume(&mut tokenizer.events);
+ // This resolver improves events, but is not needed by other resolvers.
+ false
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index d1e7d57..ee6c23c 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -196,8 +196,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
/// Resolve heading (setext).
-pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut edit_map = EditMap::new();
+pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let mut index = 0;
let mut paragraph_enter: Option<usize> = None;
let mut paragraph_exit: Option<usize> = None;
@@ -229,12 +228,13 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
let mut heading_exit = tokenizer.events[index].clone();
heading_exit.token_type = Token::HeadingSetext;
- edit_map.add(enter, 0, vec![heading_enter]);
- edit_map.add(index + 1, 0, vec![heading_exit]);
+ map.add(enter, 0, vec![heading_enter]);
+ map.add(index + 1, 0, vec![heading_exit]);
}
index += 1;
}
- edit_map.consume(&mut tokenizer.events);
+ // This resolver improves events, but is not needed by other resolvers.
+ false
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 6bd634f..bcfe343 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -612,13 +612,12 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes
/// This turns correct label start (image, link) and label end into links and
/// images, or turns them back into data.
#[allow(clippy::too_many_lines)]
-pub fn resolve_media(tokenizer: &mut Tokenizer) {
+pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let mut left = tokenizer.label_start_list_loose.split_off(0);
let mut left_2 = tokenizer.label_start_stack.split_off(0);
let media = tokenizer.media_list.split_off(0);
left.append(&mut left_2);
- let mut edit_map = EditMap::new();
let events = &tokenizer.events;
// Remove loose label starts.
@@ -628,7 +627,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) {
let data_enter_index = label_start.start.0;
let data_exit_index = label_start.start.1;
- edit_map.add(
+ map.add(
data_enter_index,
data_exit_index - data_enter_index + 1,
vec![
@@ -678,7 +677,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) {
let group_end_index = media.end.1;
// Insert a group enter and label enter.
- edit_map.add(
+ map.add(
group_enter_index,
0,
vec![
@@ -710,7 +709,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) {
// Empty events not allowed.
if text_enter_index != text_exit_index {
// Insert a text enter.
- edit_map.add(
+ map.add(
text_enter_index,
0,
vec![Event {
@@ -725,7 +724,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) {
);
// Insert a text exit.
- edit_map.add(
+ map.add(
text_exit_index,
0,
vec![Event {
@@ -741,7 +740,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) {
}
// Insert a label exit.
- edit_map.add(
+ map.add(
label_exit_index + 1,
0,
vec![Event {
@@ -756,7 +755,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) {
);
// Insert a group exit.
- edit_map.add(
+ map.add(
group_end_index + 1,
0,
vec![Event {
@@ -773,5 +772,6 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) {
index += 1;
}
- edit_map.consume(&mut tokenizer.events);
+ // This resolver is needed to figure out interleaving with attention.
+ true
}
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 48ed291..12c666b 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -390,8 +390,7 @@ fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult {
}
/// Find adjacent list items with the same marker.
-pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
- let mut edit_map = EditMap::new();
+pub fn resolve_list_item(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let mut index = 0;
let mut balance = 0;
let mut lists_wip: Vec<(Kind, usize, usize, usize)> = vec![];
@@ -486,11 +485,12 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
list_start.token_type = token_type.clone();
list_end.token_type = token_type;
- edit_map.add(list_item.2, 0, vec![list_start]);
- edit_map.add(list_item.3 + 1, 0, vec![list_end]);
+ map.add(list_item.2, 0, vec![list_start]);
+ map.add(list_item.3 + 1, 0, vec![list_end]);
index += 1;
}
- edit_map.consume(&mut tokenizer.events);
+ // This resolver improves events, but is not needed by other resolvers.
+ false
}
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 1b186e3..134a4b5 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -80,8 +80,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// Merge “`Paragraph`”s, which currently span a single line, into actual
/// `Paragraph`s that span multiple lines.
-pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut edit_map = EditMap::new();
+pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let len = tokenizer.events.len();
let mut index = 0;
@@ -105,10 +104,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
&& tokenizer.events[enter_next_index].token_type == Token::Paragraph
{
// Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph.
- edit_map.add(exit_index, 3, vec![]);
+ map.add(exit_index, 3, vec![]);
// Remove Enter:Paragraph.
- edit_map.add(enter_next_index, 1, vec![]);
+ map.add(enter_next_index, 1, vec![]);
// Add Exit:LineEnding position info to Exit:Data.
let line_ending_exit = &tokenizer.events[exit_index + 2];
@@ -142,5 +141,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
index += 1;
}
- edit_map.consume(&mut tokenizer.events);
+ // This resolver is needed by setext headings.
+ true
}
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index b56efd2..ea71bcf 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -75,8 +75,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult
}
/// Merge adjacent data events.
-pub fn resolve_data(tokenizer: &mut Tokenizer) {
- let mut edit_map = EditMap::new();
+pub fn resolve_data(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
let len = tokenizer.events.len();
let mut index = 0;
@@ -96,7 +95,7 @@ pub fn resolve_data(tokenizer: &mut Tokenizer) {
}
if exit_far_index > exit_index {
- edit_map.add(exit_index, exit_far_index - exit_index, vec![]);
+ map.add(exit_index, exit_far_index - exit_index, vec![]);
// Change positional info.
let exit_far = &tokenizer.events[exit_far_index];
@@ -114,5 +113,6 @@ pub fn resolve_data(tokenizer: &mut Tokenizer) {
index += 1;
}
- edit_map.consume(&mut tokenizer.events);
+ // This resolver helps, but is not required for other resolvers.
+ false
}
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 7b7d6bd..272978d 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -62,7 +62,7 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
///
/// Supposed to be called repeatedly, returns `1: true` when done.
pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
- let mut edit_map = EditMap::new();
+ let mut map = EditMap::new();
let mut done = true;
let mut index = 0;
@@ -166,7 +166,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
while index > 0 {
index -= 1;
- edit_map.add(
+ map.add(
slices[index].0,
2,
tokenizer.events.split_off(slices[index].1),
@@ -178,7 +178,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
index += 1;
}
- edit_map.consume(events);
+ map.consume(events);
done
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index cba1752..5d03c92 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -13,6 +13,7 @@
use crate::parser::ParseState;
use crate::token::{Token, VOID_TOKENS};
+use crate::util::edit_map::EditMap;
/// Embedded content type.
#[derive(Debug, Clone, Copy, PartialEq)]
@@ -91,7 +92,7 @@ pub type StateFnResult = (State, Option<Vec<Code>>);
/// Resolvers are supposed to change the list of events, because parsing is
/// sometimes messy, and they help expose a cleaner interface of events to
/// the compiler and other users.
-pub type Resolver = dyn FnOnce(&mut Tokenizer);
+pub type Resolver = dyn FnOnce(&mut Tokenizer, &mut EditMap) -> bool;
/// The result of a state.
pub enum State {
@@ -624,10 +625,24 @@ impl<'a> Tokenizer<'a> {
result = flush_impl(self, func);
self.drained = true;
+ let mut map = EditMap::new();
+ let mut consumed = false;
while !self.resolvers.is_empty() {
let resolver = self.resolvers.remove(0);
- resolver(self);
+ let consume = resolver(self, &mut map);
+
+ if consume {
+ map.consume(&mut self.events);
+ consumed = true;
+ map = EditMap::new();
+ } else {
+ consumed = false;
+ }
+ }
+
+ if !consumed {
+ map.consume(&mut self.events);
}
}