aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-20 12:34:06 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-20 12:34:06 +0200
commit7894ec75a7070591c3499fce1f409563c4edc7d7 (patch)
tree170d736268a30b728f28b164213a0a0ac47414da /src
parent7ec35068c86a546dac8172e74e8a34e3b6813eb2 (diff)
downloadmarkdown-rs-7894ec75a7070591c3499fce1f409563c4edc7d7.tar.gz
markdown-rs-7894ec75a7070591c3499fce1f409563c4edc7d7.tar.bz2
markdown-rs-7894ec75a7070591c3499fce1f409563c4edc7d7.zip
Refactor to use less vecs for events
Diffstat (limited to 'src')
-rw-r--r--src/construct/attention.rs4
-rw-r--r--src/construct/heading_atx.rs4
-rw-r--r--src/construct/heading_setext.rs6
-rw-r--r--src/construct/label_end.rs4
-rw-r--r--src/construct/list.rs6
-rw-r--r--src/construct/paragraph.rs6
-rw-r--r--src/construct/partial_data.rs6
-rw-r--r--src/content/document.rs15
-rw-r--r--src/subtokenize.rs6
-rw-r--r--src/tokenizer.rs12
-rw-r--r--src/util/edit_map.rs70
11 files changed, 63 insertions, 76 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 7e99600..2cbc563 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -205,7 +205,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, marker: MarkerKind) -> StateFnR
/// Resolve attention sequences.
#[allow(clippy::too_many_lines)]
-fn resolve_attention(tokenizer: &mut Tokenizer) -> Vec<Event> {
+fn resolve_attention(tokenizer: &mut Tokenizer) {
let codes = &tokenizer.parse_state.codes;
let mut edit_map = EditMap::new();
let mut start = 0;
@@ -523,7 +523,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) -> Vec<Event> {
index += 1;
}
- edit_map.consume(tokenizer.events.split_off(0))
+ edit_map.consume(&mut tokenizer.events);
}
/// Classify whether a character code represents whitespace, punctuation, or
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 4546924..feb1e9d 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -193,7 +193,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
/// Resolve heading (atx).
-pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let mut edit_map = EditMap::new();
let mut index = 0;
let mut heading_start: Option<usize> = None;
@@ -258,5 +258,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
index += 1;
}
- edit_map.consume(tokenizer.events.split_off(0))
+ edit_map.consume(&mut tokenizer.events);
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 841bf53..d1e7d57 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -60,7 +60,7 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::token::Token;
-use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, Tokenizer};
+use crate::tokenizer::{Code, EventType, State, StateFnResult, Tokenizer};
use crate::util::{edit_map::EditMap, skip::opt_back as skip_opt_back};
/// Kind of underline.
@@ -196,7 +196,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
/// Resolve heading (setext).
-pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let mut edit_map = EditMap::new();
let mut index = 0;
let mut paragraph_enter: Option<usize> = None;
@@ -236,5 +236,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
index += 1;
}
- edit_map.consume(tokenizer.events.split_off(0))
+ edit_map.consume(&mut tokenizer.events);
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 2124681..6bd634f 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -612,7 +612,7 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes
/// This turns correct label start (image, link) and label end into links and
/// images, or turns them back into data.
#[allow(clippy::too_many_lines)]
-pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
+pub fn resolve_media(tokenizer: &mut Tokenizer) {
let mut left = tokenizer.label_start_list_loose.split_off(0);
let mut left_2 = tokenizer.label_start_stack.split_off(0);
let media = tokenizer.media_list.split_off(0);
@@ -773,5 +773,5 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
index += 1;
}
- edit_map.consume(tokenizer.events.split_off(0))
+ edit_map.consume(&mut tokenizer.events);
}
diff --git a/src/construct/list.rs b/src/construct/list.rs
index db8af36..48ed291 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -50,7 +50,7 @@ use crate::construct::{
thematic_break::start as thematic_break,
};
use crate::token::Token;
-use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, Tokenizer};
+use crate::tokenizer::{Code, EventType, State, StateFnResult, Tokenizer};
use crate::util::{
edit_map::EditMap,
skip,
@@ -390,7 +390,7 @@ fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult {
}
/// Find adjacent list items with the same marker.
-pub fn resolve_list_item(tokenizer: &mut Tokenizer) -> Vec<Event> {
+pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
let mut edit_map = EditMap::new();
let mut index = 0;
let mut balance = 0;
@@ -492,5 +492,5 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) -> Vec<Event> {
index += 1;
}
- edit_map.consume(tokenizer.events.split_off(0))
+ edit_map.consume(&mut tokenizer.events);
}
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 53030f4..1b186e3 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -33,7 +33,7 @@
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
use crate::token::Token;
-use crate::tokenizer::{Code, ContentType, Event, EventType, State, StateFnResult, Tokenizer};
+use crate::tokenizer::{Code, ContentType, EventType, State, StateFnResult, Tokenizer};
use crate::util::{edit_map::EditMap, skip::opt as skip_opt};
/// Before a paragraph.
@@ -80,7 +80,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// Merge “`Paragraph`”s, which currently span a single line, into actual
/// `Paragraph`s that span multiple lines.
-pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let mut edit_map = EditMap::new();
let len = tokenizer.events.len();
let mut index = 0;
@@ -142,5 +142,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
index += 1;
}
- edit_map.consume(tokenizer.events.split_off(0))
+ edit_map.consume(&mut tokenizer.events);
}
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index b59bb76..b56efd2 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -7,7 +7,7 @@
//! [text]: crate::content::text
use crate::token::Token;
-use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, Tokenizer};
+use crate::tokenizer::{Code, EventType, State, StateFnResult, Tokenizer};
use crate::util::edit_map::EditMap;
/// At the beginning of data.
@@ -75,7 +75,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult
}
/// Merge adjacent data events.
-pub fn resolve_data(tokenizer: &mut Tokenizer) -> Vec<Event> {
+pub fn resolve_data(tokenizer: &mut Tokenizer) {
let mut edit_map = EditMap::new();
let len = tokenizer.events.len();
let mut index = 0;
@@ -114,5 +114,5 @@ pub fn resolve_data(tokenizer: &mut Tokenizer) -> Vec<Event> {
index += 1;
}
- edit_map.consume(tokenizer.events.split_off(0))
+ edit_map.consume(&mut tokenizer.events);
}
diff --git a/src/content/document.rs b/src/content/document.rs
index 53e58c4..d35060c 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -106,15 +106,16 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec
index += 1;
}
- let mut result = (tokenizer.events, false);
+ let mut done = false;
+ let mut events = tokenizer.events;
parse_state.definitions = next_definitions;
- while !result.1 {
- result = subtokenize(result.0, parse_state);
+ while !done {
+ done = subtokenize(&mut events, parse_state);
}
- result.0
+ events
}
/// Before document.
@@ -415,7 +416,7 @@ fn flow_end(
info = exit_containers(tokenizer, info, &Phase::Eof);
}
- tokenizer.events = resolve(tokenizer, &info);
+ resolve(tokenizer, &info);
(State::Ok, Some(vec![code]))
}
@@ -481,7 +482,7 @@ fn exit_containers(
}
// Inject the container events.
-fn resolve(tokenizer: &mut Tokenizer, info: &DocumentInfo) -> Vec<Event> {
+fn resolve(tokenizer: &mut Tokenizer, info: &DocumentInfo) {
let mut map = EditMap::new();
let mut line_index = 0;
let mut index = 0;
@@ -537,5 +538,5 @@ fn resolve(tokenizer: &mut Tokenizer, info: &DocumentInfo) -> Vec<Event> {
add,
);
- map.consume(tokenizer.events.split_off(0))
+ map.consume(&mut tokenizer.events);
}
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index df7b015..7b7d6bd 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -61,7 +61,7 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
/// Parse linked events.
///
/// Supposed to be called repeatedly, returns `1: true` when done.
-pub fn subtokenize(events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>, bool) {
+pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
let mut edit_map = EditMap::new();
let mut done = true;
let mut index = 0;
@@ -178,5 +178,7 @@ pub fn subtokenize(events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>,
index += 1;
}
- (edit_map.consume(events), done)
+ edit_map.consume(events);
+
+ done
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 8813bdc..cba1752 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -91,7 +91,7 @@ pub type StateFnResult = (State, Option<Vec<Code>>);
/// Resolvers are supposed to change the list of events, because parsing is
/// sometimes messy, and they help expose a cleaner interface of events to
/// the compiler and other users.
-pub type Resolver = dyn FnOnce(&mut Tokenizer) -> Vec<Event>;
+pub type Resolver = dyn FnOnce(&mut Tokenizer);
/// The result of a state.
pub enum State {
@@ -167,8 +167,6 @@ struct InternalState {
point: Point,
}
-// #[derive(Debug)]
-
/// A tokenizer itself.
#[allow(clippy::struct_excessive_bools)]
pub struct Tokenizer<'a> {
@@ -288,12 +286,12 @@ impl<'a> Tokenizer<'a> {
/// Define a jump between two places.
pub fn define_skip(&mut self, point: &Point, index: usize) {
- define_skip_current_impl(self, point.line, (point.column, point.offset, index));
+ define_skip_impl(self, point.line, (point.column, point.offset, index));
}
/// Define the current place as a jump between two places.
pub fn define_skip_current(&mut self) {
- define_skip_current_impl(
+ define_skip_impl(
self,
self.point.line,
(self.point.column, self.point.offset, self.index),
@@ -629,7 +627,7 @@ impl<'a> Tokenizer<'a> {
while !self.resolvers.is_empty() {
let resolver = self.resolvers.remove(0);
- self.events = resolver(self);
+ resolver(self);
}
}
@@ -768,7 +766,7 @@ fn flush_impl(
///
/// This defines how much columns, offsets, and the `index` are increased when
/// consuming a line ending.
-fn define_skip_current_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize)) {
+fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize)) {
log::debug!("position: define skip: {:?} -> ({:?})", line, info);
let at = line - tokenizer.line_start;
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
index 1f43a3a..b1b5064 100644
--- a/src/util/edit_map.rs
+++ b/src/util/edit_map.rs
@@ -13,26 +13,23 @@ use crate::tokenizer::Event;
/// Shift `previous` and `next` links according to `jumps`.
///
/// This fixes links in case there are events removed or added between them.
-fn shift_links(events: &mut [Event], jumps: &[(usize, isize)]) {
+fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) {
let map = |before| {
+ // To do: this theoretically gets slow, investigate how to improve it.
let mut jump_index = 0;
- let mut jump = 0;
+ let mut remove = 0;
+ let mut add = 0;
while jump_index < jumps.len() {
if jumps[jump_index].0 > before {
break;
}
- jump = jumps[jump_index].1;
+ (_, remove, add) = jumps[jump_index];
jump_index += 1;
}
- #[allow(clippy::pedantic)]
- let next_i = (before as isize) + jump;
- assert!(next_i >= 0, "cannot shift before `0`");
- #[allow(clippy::pedantic)]
- let next = next_i as usize;
- next
+ before + add - remove
};
let mut index = 0;
@@ -72,59 +69,46 @@ impl EditMap {
add_impl(self, index, remove, add, true);
}
/// Done, change the events.
- pub fn consume(&mut self, mut events: Vec<Event>) -> Vec<Event> {
+ pub fn consume(&mut self, events: &mut Vec<Event>) {
self.map
.sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
assert!(!self.consumed, "cannot consume after consuming");
self.consumed = true;
- let mut jumps: Vec<(usize, isize)> = vec![];
+ // Calculate jumps: where items in the current list move to.
+ let mut jumps = Vec::with_capacity(self.map.len());
let mut index = 0;
- let mut shift = 0;
+ let mut add_acc = 0;
+ let mut remove_acc = 0;
while index < self.map.len() {
let (at, remove, add) = &self.map[index];
-
- #[allow(clippy::pedantic)]
- let next = shift + (add.len() as isize) - (*remove as isize);
- shift = next;
- jumps.push((*at, shift));
+ add_acc += add.len();
+ remove_acc += remove;
+ jumps.push((*at, remove_acc, add_acc));
index += 1;
}
+ let len_before = events.len();
let mut index = self.map.len();
- let mut vecs: Vec<Vec<Event>> = vec![];
- let mut capacity = 0;
-
+ let mut vecs: Vec<Vec<Event>> = Vec::with_capacity(index * 2 + 1);
while index > 0 {
index -= 1;
- let at = self.map[index].0;
-
- let mut keep = events.split_off(at + self.map[index].1);
+ let (at, remove, _) = self.map[index];
+ let mut keep = events.split_off(at + remove);
shift_links(&mut keep, &jumps);
- capacity += keep.len();
vecs.push(keep);
-
- let add = self.map[index].2.split_off(0);
- capacity += add.len();
- vecs.push(add);
-
+ vecs.push(self.map[index].2.split_off(0));
events.truncate(at);
}
+ shift_links(events, &jumps);
+ vecs.push(events.split_off(0));
- shift_links(&mut events, &jumps);
- capacity += events.len();
- vecs.push(events);
+ events.reserve(len_before + add_acc - remove_acc);
- let mut next_events: Vec<Event> = Vec::with_capacity(capacity);
- let mut slice = vecs.pop();
-
- while let Some(mut x) = slice {
- next_events.append(&mut x);
- slice = vecs.pop();
+ while let Some(mut slice) = vecs.pop() {
+ events.append(&mut slice);
}
-
- next_events
}
}
@@ -133,12 +117,14 @@ fn add_impl(edit_map: &mut EditMap, at: usize, remove: usize, mut add: Vec<Event
assert!(!edit_map.consumed, "cannot add after consuming");
let mut index = 0;
+ if remove == 0 && add.is_empty() {
+ return;
+ }
+
while index < edit_map.map.len() {
if edit_map.map[index].0 == at {
edit_map.map[index].1 += remove;
- // To do: these might have to be split into several chunks instead
- // of one, if links in `curr_add` are supported.
if before {
add.append(&mut edit_map.map[index].2);
edit_map.map[index].2 = add;