aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/construct/attention.rs5
-rw-r--r--src/construct/heading_atx.rs3
-rw-r--r--src/construct/heading_setext.rs3
-rw-r--r--src/construct/label_end.rs6
-rw-r--r--src/construct/label_start_image.rs4
-rw-r--r--src/construct/label_start_link.rs4
-rw-r--r--src/construct/list.rs5
-rw-r--r--src/construct/paragraph.rs3
-rw-r--r--src/construct/partial_data.rs5
-rw-r--r--src/content/document.rs3
-rw-r--r--src/content/string.rs3
-rw-r--r--src/content/text.rs3
-rw-r--r--src/lib.rs1
-rw-r--r--src/resolve.rs34
-rw-r--r--src/tokenizer.rs98
15 files changed, 103 insertions, 77 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index ac2ef25..6f91370 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -52,6 +52,7 @@
//! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element
use crate::event::{Event, Kind, Name, Point};
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::unicode::PUNCTUATION;
@@ -141,7 +142,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.exit(Name::AttentionSequence);
- tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention));
+ tokenizer.register_resolver(ResolveName::Attention);
tokenizer.tokenize_state.marker = b'\0';
State::Ok
}
@@ -150,7 +151,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
/// Resolve attention sequences.
#[allow(clippy::too_many_lines)]
-fn resolve_attention(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let mut start = 0;
let mut balance = 0;
let mut sequences = vec![];
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index e856ac3..a114051 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -57,6 +57,7 @@
use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::event::{Content, Event, Kind, Name};
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
@@ -140,7 +141,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Name::HeadingAtx);
- tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve));
+ tokenizer.register_resolver(ResolveName::HeadingAtx);
// Feel free to interrupt.
tokenizer.interrupt = false;
State::Ok
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 3a24f9f..a3c513b 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -60,6 +60,7 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::event::{Kind, Name};
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::skip::opt_back as skip_opt_back;
@@ -160,7 +161,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
None | Some(b'\n') => {
// Feel free to interrupt.
tokenizer.interrupt = false;
- tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve));
+ tokenizer.register_resolver(ResolveName::HeadingSetext);
State::Ok
}
_ => State::Nok,
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 61f378d..f27d79f 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -149,9 +149,9 @@
use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX;
use crate::construct::partial_space_or_tab::space_or_tab_eol;
use crate::event::{Event, Kind, Name};
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::{Media, Tokenizer};
-
use crate::util::{
normalize_identifier::normalize_identifier,
skip,
@@ -331,7 +331,7 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State {
});
tokenizer.tokenize_state.start = 0;
tokenizer.tokenize_state.end = 0;
- tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media));
+ tokenizer.register_resolver_before(ResolveName::Label);
State::Ok
}
@@ -614,7 +614,7 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {
/// This turns correct label start (image, link) and label end into links and
/// images, or turns them back into data.
#[allow(clippy::too_many_lines)]
-pub fn resolve_media(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let mut left = tokenizer.tokenize_state.label_start_list_loose.split_off(0);
let mut left_2 = tokenizer.tokenize_state.label_start_stack.split_off(0);
let media = tokenizer.tokenize_state.media_list.split_off(0);
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 2f7c0bf..e8aec8b 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -28,8 +28,8 @@
//! [label_end]: crate::construct::label_end
//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
-use super::label_end::resolve_media;
use crate::event::Name;
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::{LabelStart, Tokenizer};
@@ -70,7 +70,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {
balanced: false,
inactive: false,
});
- tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media));
+ tokenizer.register_resolver_before(ResolveName::Label);
State::Ok
}
_ => State::Nok,
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index 456a4e9..530d83e 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -27,8 +27,8 @@
//! [label_end]: crate::construct::label_end
//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
-use super::label_end::resolve_media;
use crate::event::Name;
+use crate::resolve::Name as ResolveName;
use crate::state::State;
use crate::tokenizer::{LabelStart, Tokenizer};
@@ -52,7 +52,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
balanced: false,
inactive: false,
});
- tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media));
+ tokenizer.register_resolver_before(ResolveName::Label);
State::Ok
}
_ => State::Nok,
diff --git a/src/construct/list.rs b/src/construct/list.rs
index ded77d0..028e283 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -47,6 +47,7 @@
use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};
use crate::construct::partial_space_or_tab::space_or_tab_min_max;
use crate::event::{Kind, Name};
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::{
@@ -283,7 +284,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
container.size = prefix;
tokenizer.exit(Name::ListItemPrefix);
- tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve_list_item));
+ tokenizer.register_resolver_before(ResolveName::List);
State::Ok
}
}
@@ -355,7 +356,7 @@ pub fn nok(_tokenizer: &mut Tokenizer) -> State {
}
/// Find adjacent list items with the same marker.
-pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
let mut index = 0;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index b605c0f..acbee83 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -33,6 +33,7 @@
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
use crate::event::{Content, Kind, Name};
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::skip::opt as skip_opt;
@@ -65,7 +66,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
None | Some(b'\n') => {
tokenizer.exit(Name::Data);
tokenizer.exit(Name::Paragraph);
- tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve));
+ tokenizer.register_resolver_before(ResolveName::Paragraph);
// You’d be interrupting.
tokenizer.interrupt = true;
State::Ok
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index fda021e..f9b7947 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -7,6 +7,7 @@
//! [text]: crate::content::text
use crate::event::{Kind, Name};
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
@@ -44,7 +45,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
State::Next(StateName::DataAtBreak)
}
Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => {
- tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data));
+ tokenizer.register_resolver_before(ResolveName::Data);
State::Ok
}
_ => {
@@ -77,7 +78,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
/// Merge adjacent data events.
-pub fn resolve_data(tokenizer: &mut Tokenizer) {
+pub fn resolve(tokenizer: &mut Tokenizer) {
let len = tokenizer.events.len();
let mut index = 0;
diff --git a/src/content/document.rs b/src/content/document.rs
index 998bc06..b990ba5 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -531,7 +531,4 @@ fn resolve(tokenizer: &mut Tokenizer) {
tokenizer
.resolvers
.append(&mut child.resolvers.split_off(0));
- tokenizer
- .resolver_ids
- .append(&mut child.resolver_ids.split_off(0));
}
diff --git a/src/content/string.rs b/src/content/string.rs
index 79dee6c..ce850e7 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -13,6 +13,7 @@
//! [text]: crate::content::text
use crate::construct::partial_whitespace::resolve_whitespace;
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
@@ -20,7 +21,7 @@ const MARKERS: [u8; 2] = [b'&', b'\\'];
/// Start of string.
pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve));
+ tokenizer.register_resolver(ResolveName::String);
tokenizer.tokenize_state.markers = &MARKERS;
State::Retry(StateName::StringBefore)
}
diff --git a/src/content/text.rs b/src/content/text.rs
index 77c5963..570759d 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -21,6 +21,7 @@
//! > [whitespace][crate::construct::partial_whitespace].
use crate::construct::partial_whitespace::resolve_whitespace;
+use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
@@ -38,7 +39,7 @@ const MARKERS: [u8; 9] = [
/// Start of text.
pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve));
+ tokenizer.register_resolver(ResolveName::Text);
tokenizer.tokenize_state.markers = &MARKERS;
State::Retry(StateName::TextBefore)
}
diff --git a/src/lib.rs b/src/lib.rs
index 5e59c57..afa34c0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -10,6 +10,7 @@ mod construct;
mod content;
mod event;
mod parser;
+mod resolve;
mod state;
mod subtokenize;
mod tokenizer;
diff --git a/src/resolve.rs b/src/resolve.rs
new file mode 100644
index 0000000..e72b2a2
--- /dev/null
+++ b/src/resolve.rs
@@ -0,0 +1,34 @@
+use crate::construct;
+use crate::content;
+use crate::tokenizer::Tokenizer;
+
+/// Names of functions to move to.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum Name {
+ Label,
+ Attention,
+ HeadingAtx,
+ HeadingSetext,
+ List,
+ Paragraph,
+ Data,
+ String,
+ Text,
+}
+
+/// Call the corresponding function for a state name.
+pub fn call(tokenizer: &mut Tokenizer, name: Name) {
+ let func = match name {
+ Name::Label => construct::label_end::resolve,
+ Name::Attention => construct::attention::resolve,
+ Name::HeadingAtx => construct::heading_atx::resolve,
+ Name::HeadingSetext => construct::heading_setext::resolve,
+ Name::List => construct::list::resolve,
+ Name::Paragraph => construct::paragraph::resolve,
+ Name::Data => construct::partial_data::resolve,
+ Name::String => content::string::resolve,
+ Name::Text => content::text::resolve,
+ };
+
+ func(tokenizer);
+}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index b48351d..b2d0751 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -14,42 +14,10 @@
use crate::constant::TAB_SIZE;
use crate::event::{Content, Event, Kind, Link, Name, Point, VOID_EVENTS};
use crate::parser::ParseState;
+use crate::resolve::{call as call_resolve, Name as ResolveName};
use crate::state::{call, Name as StateName, State};
use crate::util::edit_map::EditMap;
-/// How to handle a byte.
-#[derive(Debug, PartialEq)]
-pub enum ByteAction {
- /// This is a normal byte.
- ///
- /// Includes replaced bytes.
- Normal(u8),
- /// This is a new byte.
- Insert(u8),
- /// This byte must be ignored.
- Ignore,
-}
-
-/// Callback that can be registered and is called when the tokenizer is done.
-///
-/// Resolvers are supposed to change the list of events, because parsing is
-/// sometimes messy, and they help expose a cleaner interface of events to
-/// the compiler and other users.
-pub type Resolver = dyn FnOnce(&mut Tokenizer);
-
-/// Loose label starts we found.
-#[derive(Debug)]
-pub struct LabelStart {
- /// Indices of where the label starts and ends in `events`.
- pub start: (usize, usize),
- /// A boolean used internally to figure out if a label start link can’t be
- /// used (because links in links are incorrect).
- pub inactive: bool,
- /// A boolean used internally to figure out if a label is balanced: they’re
- /// not media, it’s just balanced braces.
- pub balanced: bool,
-}
-
/// Media we found.
#[derive(Debug)]
pub struct Media {
@@ -80,6 +48,32 @@ pub struct ContainerState {
pub size: usize,
}
+/// How to handle a byte.
+#[derive(Debug, PartialEq)]
+enum ByteAction {
+ /// This is a normal byte.
+ ///
+ /// Includes replaced bytes.
+ Normal(u8),
+ /// This is a new byte.
+ Insert(u8),
+ /// This byte must be ignored.
+ Ignore,
+}
+
+/// Loose label starts we found.
+#[derive(Debug)]
+pub struct LabelStart {
+ /// Indices of where the label starts and ends in `events`.
+ pub start: (usize, usize),
+ /// A boolean used internally to figure out if a label start link can’t be
+ /// used (because links in links are incorrect).
+ pub inactive: bool,
+ /// A boolean used internally to figure out if a label is balanced: they’re
+ /// not media, it’s just balanced braces.
+ pub balanced: bool,
+}
+
/// Different kinds of attempts.
#[derive(Debug, PartialEq)]
enum AttemptKind {
@@ -129,6 +123,7 @@ struct Progress {
/// A lot of shared fields used to tokenize things.
#[allow(clippy::struct_excessive_bools)]
+#[derive(Debug)]
pub struct TokenizeState<'a> {
// Couple complex fields used to tokenize the document.
/// Tokenizer, used to tokenize flow in document.
@@ -205,6 +200,7 @@ pub struct TokenizeState<'a> {
/// A tokenizer itself.
#[allow(clippy::struct_excessive_bools)]
+#[derive(Debug)]
pub struct Tokenizer<'a> {
/// Jump between line endings.
column_start: Vec<(usize, usize)>,
@@ -217,8 +213,6 @@ pub struct Tokenizer<'a> {
///
/// Tracked to make sure everything’s valid.
consumed: bool,
- /// Track whether this tokenizer is done.
- resolved: bool,
/// Stack of how to handle attempts.
attempts: Vec<Attempt>,
/// Current byte.
@@ -235,11 +229,8 @@ pub struct Tokenizer<'a> {
pub stack: Vec<Name>,
/// Edit map, to batch changes.
pub map: EditMap,
- /// List of attached resolvers, which will be called when done feeding,
- /// to clean events.
- pub resolvers: Vec<Box<Resolver>>,
- /// List of names associated with attached resolvers.
- pub resolver_ids: Vec<String>,
+ /// List of resolvers.
+ pub resolvers: Vec<ResolveName>,
/// Shared parsing state across tokenizers.
pub parse_state: &'a ParseState<'a>,
/// A lot of shared fields used to tokenize things.
@@ -270,7 +261,6 @@ impl<'a> Tokenizer<'a> {
first_line: point.line,
line_start: point.clone(),
consumed: true,
- resolved: false,
attempts: vec![],
point,
stack: vec![],
@@ -317,23 +307,20 @@ impl<'a> Tokenizer<'a> {
concrete: false,
lazy: false,
resolvers: vec![],
- resolver_ids: vec![],
}
}
/// Register a resolver.
- pub fn register_resolver(&mut self, id: String, resolver: Box<Resolver>) {
- if !self.resolver_ids.contains(&id) {
- self.resolver_ids.push(id);
- self.resolvers.push(resolver);
+ pub fn register_resolver(&mut self, name: ResolveName) {
+ if !self.resolvers.contains(&name) {
+ self.resolvers.push(name);
}
}
/// Register a resolver, before others.
- pub fn register_resolver_before(&mut self, id: String, resolver: Box<Resolver>) {
- if !self.resolver_ids.contains(&id) {
- self.resolver_ids.push(id);
- self.resolvers.insert(0, resolver);
+ pub fn register_resolver_before(&mut self, name: ResolveName) {
+ if !self.resolvers.contains(&name) {
+ self.resolvers.insert(0, name);
}
}
@@ -587,11 +574,11 @@ impl<'a> Tokenizer<'a> {
push_impl(self, to, to, state, true);
if resolve {
- self.resolved = true;
-
- while !self.resolvers.is_empty() {
- let resolver = self.resolvers.remove(0);
- resolver(self);
+ let resolvers = self.resolvers.split_off(0);
+ let mut index = 0;
+ while index < resolvers.len() {
+ call_resolve(self, resolvers[index]);
+ index += 1;
}
self.map.consume(&mut self.events);
@@ -619,7 +606,6 @@ fn push_impl(
mut state: State,
flush: bool,
) -> State {
- debug_assert!(!tokenizer.resolved, "cannot feed after drain");
debug_assert!(
from.0 > tokenizer.point.index
|| (from.0 == tokenizer.point.index && from.1 >= tokenizer.point.vs),