aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-25 15:52:35 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-25 15:52:35 +0200
commitca56f2742d8719358d2046fbdee4f1087add0568 (patch)
tree702d3ff3ab6ad432bdff59990b7b4211da57afda /src
parent11304728b6607bc2a8d41a640308f3379a25b933 (diff)
downloadmarkdown-rs-ca56f2742d8719358d2046fbdee4f1087add0568.tar.gz
markdown-rs-ca56f2742d8719358d2046fbdee4f1087add0568.tar.bz2
markdown-rs-ca56f2742d8719358d2046fbdee4f1087add0568.zip
Refactor to remove need for cloning codes
Diffstat (limited to '')
-rw-r--r--src/content/document.rs2
-rw-r--r--src/subtokenize.rs14
-rw-r--r--src/tokenizer.rs54
3 files changed, 26 insertions, 44 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
index f8d7b55..29aaa84 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -78,7 +78,7 @@ struct DocumentInfo {
pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
let mut tokenizer = Tokenizer::new(point, parse_state);
- tokenizer.push(parse_state.codes.clone(), Box::new(start), true);
+ tokenizer.push(0, parse_state.codes.len(), Box::new(start), true);
let mut index = 0;
let mut definitions = vec![];
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 0c9df34..a1ec90a 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -24,7 +24,7 @@
use crate::content::{string::start as string, text::start as text};
use crate::parser::ParseState;
use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer};
-use crate::util::{edit_map::EditMap, span};
+use crate::util::edit_map::EditMap;
/// Create a link between two [`Event`][]s.
///
@@ -84,20 +84,15 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
} else {
text
}));
- let mut size = 0;
// Loop through links to pass them in order to the subtokenizer.
while let Some(index) = link_index {
let enter = &events[index];
let link_curr = enter.link.as_ref().expect("expected link");
assert_eq!(enter.event_type, EventType::Enter);
- let span = span::Span {
- start_index: enter.point.index,
- end_index: events[index + 1].point.index,
- };
if link_curr.previous != None {
- tokenizer.define_skip(&enter.point, size);
+ tokenizer.define_skip(&enter.point);
}
let func = match state {
@@ -106,13 +101,12 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
};
state = tokenizer.push(
- span::codes(&parse_state.codes, &span).to_vec(),
+ enter.point.index,
+ events[index + 1].point.index,
func,
link_curr.next == None,
);
- size += span.end_index - span.start_index;
-
link_index = link_curr.next;
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7ec0d91..637b34f 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -173,7 +173,7 @@ struct InternalState {
#[allow(clippy::struct_excessive_bools)]
pub struct Tokenizer<'a> {
/// Jump between line endings.
- column_start: Vec<(usize, usize, usize, usize)>,
+ column_start: Vec<(usize, usize, usize)>,
// First line.
line_start: usize,
/// Track whether a character is expected to be consumed, and whether it’s
@@ -204,7 +204,7 @@ pub struct Tokenizer<'a> {
resolver_ids: Vec<String>,
/// Shared parsing state across tokenizers.
pub parse_state: &'a ParseState<'a>,
- codes: Vec<Code>,
+ /// To do.
pub index: usize,
/// Stack of label (start) that could form images and links.
///
@@ -250,7 +250,6 @@ impl<'a> Tokenizer<'a> {
stack: vec![],
events: vec![],
parse_state,
- codes: vec![],
index: 0,
map: EditMap::new(),
label_start_stack: vec![],
@@ -292,12 +291,8 @@ impl<'a> Tokenizer<'a> {
}
/// Define a jump between two places.
- pub fn define_skip(&mut self, point: &Point, index: usize) {
- define_skip_impl(
- self,
- point.line,
- (point.column, point.offset, point.index, index),
- );
+ pub fn define_skip(&mut self, point: &Point) {
+ define_skip_impl(self, point.line, (point.column, point.offset, point.index));
}
/// Define the current place as a jump between two places.
@@ -305,12 +300,7 @@ impl<'a> Tokenizer<'a> {
define_skip_impl(
self,
self.point.line,
- (
- self.point.column,
- self.point.offset,
- self.point.index,
- self.index,
- ),
+ (self.point.column, self.point.offset, self.point.index),
);
}
@@ -320,11 +310,11 @@ impl<'a> Tokenizer<'a> {
let at = self.point.line - self.line_start;
if self.point.column == 1 && at != self.column_start.len() {
- let (column, offset, index_abs, index_rel) = &self.column_start[at];
+ let (column, offset, index) = &self.column_start[at];
self.point.column = *column;
self.point.offset = *offset;
- self.point.index = *index_abs;
- self.index = *index_rel;
+ self.point.index = *index;
+ self.index = *index;
}
}
@@ -357,7 +347,6 @@ impl<'a> Tokenizer<'a> {
self.point.column,
self.point.offset,
self.point.index,
- self.index,
));
}
@@ -636,18 +625,14 @@ impl<'a> Tokenizer<'a> {
/// markdown into the state machine, and normally pauses after feeding.
pub fn push(
&mut self,
- mut codes: Vec<Code>,
+ min: usize,
+ max: usize,
start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
drain: bool,
) -> State {
assert!(!self.drained, "cannot feed after drain");
- // Let’s assume an event per character.
- self.events.reserve(codes.len());
-
- self.codes.append(&mut codes);
-
- let mut result = feed_impl(self, start);
+ let mut result = feed_impl(self, min, max, start);
if drain {
let func = match result {
@@ -655,7 +640,7 @@ impl<'a> Tokenizer<'a> {
_ => unreachable!("expected next state"),
};
- result = flush_impl(self, func);
+ result = flush_impl(self, max, func);
self.drained = true;
@@ -672,7 +657,7 @@ impl<'a> Tokenizer<'a> {
/// Flush the tokenizer.
pub fn flush(&mut self, start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static) -> State {
- flush_impl(self, start)
+ flush_impl(self, self.index, start)
}
}
@@ -717,14 +702,17 @@ fn attempt_impl(
/// Feed a list of `codes` into `start`.
fn feed_impl(
tokenizer: &mut Tokenizer,
+ min: usize,
+ max: usize,
start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
) -> State {
let mut state = State::Fn(Box::new(start));
+ tokenizer.index = min;
tokenizer.consumed = true;
- while tokenizer.index < tokenizer.codes.len() {
- let code = tokenizer.codes[tokenizer.index];
+ while tokenizer.index < max {
+ let code = tokenizer.parse_state.codes[tokenizer.index];
match state {
State::Ok(_) | State::Nok => {
@@ -744,10 +732,10 @@ fn feed_impl(
/// Flush `start`: pass `eof`s to it until done.
fn flush_impl(
tokenizer: &mut Tokenizer,
+ max: usize,
start: impl FnOnce(&mut Tokenizer, Code) -> State + 'static,
) -> State {
let mut state = State::Fn(Box::new(start));
- let max = tokenizer.index;
tokenizer.consumed = true;
loop {
@@ -755,7 +743,7 @@ fn flush_impl(
State::Ok(_) | State::Nok => break,
State::Fn(func) => {
let code = if tokenizer.index < max {
- tokenizer.codes[tokenizer.index]
+ tokenizer.parse_state.codes[tokenizer.index]
} else {
Code::None
};
@@ -778,7 +766,7 @@ fn flush_impl(
///
/// This defines how much columns, offsets, and the `index` are increased when
/// consuming a line ending.
-fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize, usize)) {
+fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, usize)) {
log::debug!("position: define skip: {:?} -> ({:?})", line, info);
let at = line - tokenizer.line_start;