diff options
Diffstat (limited to '')
-rw-r--r-- | src/construct/attention.rs | 5 | ||||
-rw-r--r-- | src/construct/heading_atx.rs | 3 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 3 | ||||
-rw-r--r-- | src/construct/label_end.rs | 6 | ||||
-rw-r--r-- | src/construct/label_start_image.rs | 4 | ||||
-rw-r--r-- | src/construct/label_start_link.rs | 4 | ||||
-rw-r--r-- | src/construct/list.rs | 5 | ||||
-rw-r--r-- | src/construct/paragraph.rs | 3 | ||||
-rw-r--r-- | src/construct/partial_data.rs | 5 | ||||
-rw-r--r-- | src/content/document.rs | 3 | ||||
-rw-r--r-- | src/content/string.rs | 3 | ||||
-rw-r--r-- | src/content/text.rs | 3 | ||||
-rw-r--r-- | src/lib.rs | 1 | ||||
-rw-r--r-- | src/resolve.rs | 34 | ||||
-rw-r--r-- | src/tokenizer.rs | 98 |
15 files changed, 103 insertions, 77 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index ac2ef25..6f91370 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -52,6 +52,7 @@ //! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element use crate::event::{Event, Kind, Name, Point}; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::unicode::PUNCTUATION; @@ -141,7 +142,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } _ => { tokenizer.exit(Name::AttentionSequence); - tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention)); + tokenizer.register_resolver(ResolveName::Attention); tokenizer.tokenize_state.marker = b'\0'; State::Ok } @@ -150,7 +151,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { /// Resolve attention sequences. #[allow(clippy::too_many_lines)] -fn resolve_attention(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) { let mut start = 0; let mut balance = 0; let mut sequences = vec![]; diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index e856ac3..a114051 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -57,6 +57,7 @@ use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE}; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::event::{Content, Event, Kind, Name}; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; @@ -140,7 +141,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Name::HeadingAtx); - tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve)); + tokenizer.register_resolver(ResolveName::HeadingAtx); // Feel free to interrupt. tokenizer.interrupt = false; State::Ok diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 3a24f9f..a3c513b 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -60,6 +60,7 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::event::{Kind, Name}; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::skip::opt_back as skip_opt_back; @@ -160,7 +161,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') => { // Feel free to interrupt. tokenizer.interrupt = false; - tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve)); + tokenizer.register_resolver(ResolveName::HeadingSetext); State::Ok } _ => State::Nok, diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 61f378d..f27d79f 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -149,9 +149,9 @@ use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX; use crate::construct::partial_space_or_tab::space_or_tab_eol; use crate::event::{Event, Kind, Name}; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::{Media, Tokenizer}; - use crate::util::{ normalize_identifier::normalize_identifier, skip, @@ -331,7 +331,7 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State { }); tokenizer.tokenize_state.start = 0; tokenizer.tokenize_state.end = 0; - tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); + tokenizer.register_resolver_before(ResolveName::Label); State::Ok } @@ -614,7 +614,7 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State { /// This turns correct label start (image, link) and label end into links and /// images, or turns them back into data. #[allow(clippy::too_many_lines)] -pub fn resolve_media(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) { let mut left = tokenizer.tokenize_state.label_start_list_loose.split_off(0); let mut left_2 = tokenizer.tokenize_state.label_start_stack.split_off(0); let media = tokenizer.tokenize_state.media_list.split_off(0); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 2f7c0bf..e8aec8b 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -28,8 +28,8 @@ //! [label_end]: crate::construct::label_end //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element -use super::label_end::resolve_media; use crate::event::Name; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::{LabelStart, Tokenizer}; @@ -70,7 +70,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { balanced: false, inactive: false, }); - tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); + tokenizer.register_resolver_before(ResolveName::Label); State::Ok } _ => State::Nok, diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index 456a4e9..530d83e 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -27,8 +27,8 @@ //! [label_end]: crate::construct::label_end //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element -use super::label_end::resolve_media; use crate::event::Name; +use crate::resolve::Name as ResolveName; use crate::state::State; use crate::tokenizer::{LabelStart, Tokenizer}; @@ -52,7 +52,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { balanced: false, inactive: false, }); - tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); + tokenizer.register_resolver_before(ResolveName::Label); State::Ok } _ => State::Nok, diff --git a/src/construct/list.rs b/src/construct/list.rs index ded77d0..028e283 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -47,6 +47,7 @@ use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}; use crate::construct::partial_space_or_tab::space_or_tab_min_max; use crate::event::{Kind, Name}; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::{ @@ -283,7 +284,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { container.size = prefix; tokenizer.exit(Name::ListItemPrefix); - tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve_list_item)); + tokenizer.register_resolver_before(ResolveName::List); State::Ok } } @@ -355,7 +356,7 @@ pub fn nok(_tokenizer: &mut Tokenizer) -> State { } /// Find adjacent list items with the same marker. -pub fn resolve_list_item(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) { let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![]; let mut lists: Vec<(u8, usize, usize, usize)> = vec![]; let mut index = 0; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index b605c0f..acbee83 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -33,6 +33,7 @@ //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element use crate::event::{Content, Kind, Name}; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::skip::opt as skip_opt; @@ -65,7 +66,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { None | Some(b'\n') => { tokenizer.exit(Name::Data); tokenizer.exit(Name::Paragraph); - tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve)); + tokenizer.register_resolver_before(ResolveName::Paragraph); // You’d be interrupting. tokenizer.interrupt = true; State::Ok diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index fda021e..f9b7947 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -7,6 +7,7 @@ //! [text]: crate::content::text use crate::event::{Kind, Name}; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; @@ -44,7 +45,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::DataAtBreak) } Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => { - tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data)); + tokenizer.register_resolver_before(ResolveName::Data); State::Ok } _ => { @@ -77,7 +78,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State { } /// Merge adjacent data events. -pub fn resolve_data(tokenizer: &mut Tokenizer) { +pub fn resolve(tokenizer: &mut Tokenizer) { let len = tokenizer.events.len(); let mut index = 0; diff --git a/src/content/document.rs b/src/content/document.rs index 998bc06..b990ba5 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -531,7 +531,4 @@ fn resolve(tokenizer: &mut Tokenizer) { tokenizer .resolvers .append(&mut child.resolvers.split_off(0)); - tokenizer - .resolver_ids - .append(&mut child.resolver_ids.split_off(0)); } diff --git a/src/content/string.rs b/src/content/string.rs index 79dee6c..ce850e7 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -13,6 +13,7 @@ //! [text]: crate::content::text use crate::construct::partial_whitespace::resolve_whitespace; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; @@ -20,7 +21,7 @@ const MARKERS: [u8; 2] = [b'&', b'\\']; /// Start of string. pub fn start(tokenizer: &mut Tokenizer) -> State { - tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); + tokenizer.register_resolver(ResolveName::String); tokenizer.tokenize_state.markers = &MARKERS; State::Retry(StateName::StringBefore) } diff --git a/src/content/text.rs b/src/content/text.rs index 77c5963..570759d 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -21,6 +21,7 @@ //! > [whitespace][crate::construct::partial_whitespace]. use crate::construct::partial_whitespace::resolve_whitespace; +use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; @@ -38,7 +39,7 @@ const MARKERS: [u8; 9] = [ /// Start of text. pub fn start(tokenizer: &mut Tokenizer) -> State { - tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve)); + tokenizer.register_resolver(ResolveName::Text); tokenizer.tokenize_state.markers = &MARKERS; State::Retry(StateName::TextBefore) } @@ -10,6 +10,7 @@ mod construct; mod content; mod event; mod parser; +mod resolve; mod state; mod subtokenize; mod tokenizer; diff --git a/src/resolve.rs b/src/resolve.rs new file mode 100644 index 0000000..e72b2a2 --- /dev/null +++ b/src/resolve.rs @@ -0,0 +1,34 @@ +use crate::construct; +use crate::content; +use crate::tokenizer::Tokenizer; + +/// Names of functions to move to. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Name { + Label, + Attention, + HeadingAtx, + HeadingSetext, + List, + Paragraph, + Data, + String, + Text, +} + +/// Call the corresponding function for a state name. +pub fn call(tokenizer: &mut Tokenizer, name: Name) { + let func = match name { + Name::Label => construct::label_end::resolve, + Name::Attention => construct::attention::resolve, + Name::HeadingAtx => construct::heading_atx::resolve, + Name::HeadingSetext => construct::heading_setext::resolve, + Name::List => construct::list::resolve, + Name::Paragraph => construct::paragraph::resolve, + Name::Data => construct::partial_data::resolve, + Name::String => content::string::resolve, + Name::Text => content::text::resolve, + }; + + func(tokenizer); +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b48351d..b2d0751 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -14,42 +14,10 @@ use crate::constant::TAB_SIZE; use crate::event::{Content, Event, Kind, Link, Name, Point, VOID_EVENTS}; use crate::parser::ParseState; +use crate::resolve::{call as call_resolve, Name as ResolveName}; use crate::state::{call, Name as StateName, State}; use crate::util::edit_map::EditMap; -/// How to handle a byte. -#[derive(Debug, PartialEq)] -pub enum ByteAction { - /// This is a normal byte. - /// - /// Includes replaced bytes. - Normal(u8), - /// This is a new byte. - Insert(u8), - /// This byte must be ignored. - Ignore, -} - -/// Callback that can be registered and is called when the tokenizer is done. -/// -/// Resolvers are supposed to change the list of events, because parsing is -/// sometimes messy, and they help expose a cleaner interface of events to -/// the compiler and other users. -pub type Resolver = dyn FnOnce(&mut Tokenizer); - -/// Loose label starts we found. -#[derive(Debug)] -pub struct LabelStart { - /// Indices of where the label starts and ends in `events`. - pub start: (usize, usize), - /// A boolean used internally to figure out if a label start link can’t be - /// used (because links in links are incorrect). - pub inactive: bool, - /// A boolean used internally to figure out if a label is balanced: they’re - /// not media, it’s just balanced braces. - pub balanced: bool, -} - /// Media we found. #[derive(Debug)] pub struct Media { @@ -80,6 +48,32 @@ pub struct ContainerState { pub size: usize, } +/// How to handle a byte. +#[derive(Debug, PartialEq)] +enum ByteAction { + /// This is a normal byte. + /// + /// Includes replaced bytes. + Normal(u8), + /// This is a new byte. + Insert(u8), + /// This byte must be ignored. + Ignore, +} + +/// Loose label starts we found. +#[derive(Debug)] +pub struct LabelStart { + /// Indices of where the label starts and ends in `events`. + pub start: (usize, usize), + /// A boolean used internally to figure out if a label start link can’t be + /// used (because links in links are incorrect). + pub inactive: bool, + /// A boolean used internally to figure out if a label is balanced: they’re + /// not media, it’s just balanced braces. + pub balanced: bool, +} + /// Different kinds of attempts. #[derive(Debug, PartialEq)] enum AttemptKind { @@ -129,6 +123,7 @@ struct Progress { /// A lot of shared fields used to tokenize things. #[allow(clippy::struct_excessive_bools)] +#[derive(Debug)] pub struct TokenizeState<'a> { // Couple complex fields used to tokenize the document. /// Tokenizer, used to tokenize flow in document. @@ -205,6 +200,7 @@ pub struct TokenizeState<'a> { /// A tokenizer itself. #[allow(clippy::struct_excessive_bools)] +#[derive(Debug)] pub struct Tokenizer<'a> { /// Jump between line endings. column_start: Vec<(usize, usize)>, @@ -217,8 +213,6 @@ pub struct Tokenizer<'a> { /// /// Tracked to make sure everything’s valid. consumed: bool, - /// Track whether this tokenizer is done. - resolved: bool, /// Stack of how to handle attempts. attempts: Vec<Attempt>, /// Current byte. @@ -235,11 +229,8 @@ pub struct Tokenizer<'a> { pub stack: Vec<Name>, /// Edit map, to batch changes. pub map: EditMap, - /// List of attached resolvers, which will be called when done feeding, - /// to clean events. - pub resolvers: Vec<Box<Resolver>>, - /// List of names associated with attached resolvers. - pub resolver_ids: Vec<String>, + /// List of resolvers. + pub resolvers: Vec<ResolveName>, /// Shared parsing state across tokenizers. pub parse_state: &'a ParseState<'a>, /// A lot of shared fields used to tokenize things. @@ -270,7 +261,6 @@ impl<'a> Tokenizer<'a> { first_line: point.line, line_start: point.clone(), consumed: true, - resolved: false, attempts: vec![], point, stack: vec![], @@ -317,23 +307,20 @@ impl<'a> Tokenizer<'a> { concrete: false, lazy: false, resolvers: vec![], - resolver_ids: vec![], } } /// Register a resolver. - pub fn register_resolver(&mut self, id: String, resolver: Box<Resolver>) { - if !self.resolver_ids.contains(&id) { - self.resolver_ids.push(id); - self.resolvers.push(resolver); + pub fn register_resolver(&mut self, name: ResolveName) { + if !self.resolvers.contains(&name) { + self.resolvers.push(name); } } /// Register a resolver, before others. - pub fn register_resolver_before(&mut self, id: String, resolver: Box<Resolver>) { - if !self.resolver_ids.contains(&id) { - self.resolver_ids.push(id); - self.resolvers.insert(0, resolver); + pub fn register_resolver_before(&mut self, name: ResolveName) { + if !self.resolvers.contains(&name) { + self.resolvers.insert(0, name); } } @@ -587,11 +574,11 @@ impl<'a> Tokenizer<'a> { push_impl(self, to, to, state, true); if resolve { - self.resolved = true; - - while !self.resolvers.is_empty() { - let resolver = self.resolvers.remove(0); - resolver(self); + let resolvers = self.resolvers.split_off(0); + let mut index = 0; + while index < resolvers.len() { + call_resolve(self, resolvers[index]); + index += 1; } self.map.consume(&mut self.events); @@ -619,7 +606,6 @@ fn push_impl( mut state: State, flush: bool, ) -> State { - debug_assert!(!tokenizer.resolved, "cannot feed after drain"); debug_assert!( from.0 > tokenizer.point.index || (from.0 == tokenizer.point.index && from.1 >= tokenizer.point.vs), |