aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-12 17:28:19 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-12 17:28:19 +0200
commit6dc2011d69c85820feddf6799142d304cc2eeb29 (patch)
tree29d8774a526631052ba6cd56be2492e5e249d494
parent1234de9c22343fc4e1fe9e3e1127d2db01e96c2f (diff)
downloadmarkdown-rs-6dc2011d69c85820feddf6799142d304cc2eeb29.tar.gz
markdown-rs-6dc2011d69c85820feddf6799142d304cc2eeb29.tar.bz2
markdown-rs-6dc2011d69c85820feddf6799142d304cc2eeb29.zip
Refactor to improve entering
-rw-r--r--src/construct/code_fenced.rs20
-rw-r--r--src/construct/heading_atx.rs11
-rw-r--r--src/construct/html_flow.rs2
-rw-r--r--src/construct/paragraph.rs11
-rw-r--r--src/construct/partial_destination.rs20
-rw-r--r--src/construct/partial_label.rs13
-rw-r--r--src/construct/partial_space_or_tab.rs28
-rw-r--r--src/construct/partial_space_or_tab_eol.rs53
-rw-r--r--src/construct/partial_title.rs13
-rw-r--r--src/content/document.rs8
-rw-r--r--src/event.rs2
-rw-r--r--src/subtokenize.rs8
-rw-r--r--src/tokenizer.rs51
13 files changed, 144 insertions, 96 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 56a2a04..be0542a 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -103,7 +103,7 @@
use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
-use crate::event::{Content, Name};
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::slice::{Position, Slice};
@@ -223,7 +223,14 @@ pub fn info_before(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.enter(Name::CodeFencedFenceInfo);
- tokenizer.enter_with_content(Name::Data, Some(Content::String));
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::String,
+ },
+ );
State::Retry(StateName::CodeFencedInfo)
}
}
@@ -281,7 +288,14 @@ pub fn meta_before(tokenizer: &mut Tokenizer) -> State {
None | Some(b'\n') => State::Retry(StateName::CodeFencedInfoBefore),
_ => {
tokenizer.enter(Name::CodeFencedFenceMeta);
- tokenizer.enter_with_content(Name::Data, Some(Content::String));
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::String,
+ },
+ );
State::Retry(StateName::CodeFencedMeta)
}
}
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index f75805a..22b93db 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -56,7 +56,7 @@
use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
-use crate::event::{Content, Event, Kind, Name};
+use crate::event::{Content, Event, Kind, Link, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
@@ -157,7 +157,14 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::HeadingAtxSequenceFurther)
}
Some(_) => {
- tokenizer.enter_with_content(Name::Data, Some(Content::Text));
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::Text,
+ },
+ );
State::Retry(StateName::HeadingAtxData)
}
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 2d685b6..123e1a3 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -145,7 +145,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
usize::MAX
},
connect: false,
- content_type: None,
+ content: None,
},
))
} else {
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index c956a2c..e9fd377 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -32,7 +32,7 @@
//! [code_text]: crate::construct::code_text
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
-use crate::event::{Content, Kind, Name};
+use crate::event::{Content, Kind, Link, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
@@ -49,7 +49,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
None | Some(b'\n') => unreachable!("unexpected eol/eof"),
_ => {
tokenizer.enter(Name::Paragraph);
- tokenizer.enter_with_content(Name::Data, Some(Content::Text));
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::Text,
+ },
+ );
State::Retry(StateName::ParagraphInside)
}
}
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 29cb5c4..d2477ab 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -71,7 +71,7 @@
//! [label_end]: crate::construct::label_end
//! [sanitize_uri]: crate::util::sanitize_uri
-use crate::event::{Content, Name};
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
@@ -99,7 +99,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
tokenizer.enter(tokenizer.tokenize_state.token_4.clone());
tokenizer.enter(tokenizer.tokenize_state.token_5.clone());
- tokenizer.enter_with_content(Name::Data, Some(Content::String));
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::String,
+ },
+ );
State::Retry(StateName::DestinationRaw)
}
}
@@ -121,7 +128,14 @@ pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State {
State::Ok
} else {
tokenizer.enter(tokenizer.tokenize_state.token_5.clone());
- tokenizer.enter_with_content(Name::Data, Some(Content::String));
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::String,
+ },
+ );
State::Retry(StateName::DestinationEnclosed)
}
}
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index a1667e1..20a7b15 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -60,7 +60,7 @@
use crate::constant::LINK_REFERENCE_SIZE_MAX;
use crate::construct::partial_space_or_tab_eol::{space_or_tab_eol_with_options, Options};
-use crate::event::{Content, Name};
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
@@ -110,7 +110,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_eol_with_options(
tokenizer,
Options {
- content_type: Some(Content::String),
+ content: Some(Content::String),
connect: tokenizer.tokenize_state.connect,
},
))
@@ -127,7 +127,14 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
State::Ok
}
_ => {
- tokenizer.enter_with_content(Name::Data, Some(Content::String));
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::String,
+ },
+ );
if tokenizer.tokenize_state.connect {
let index = tokenizer.events.len() - 1;
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 9637373..43cfd45 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -4,7 +4,7 @@
//!
//! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js)
-use crate::event::{Content, Name};
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
@@ -21,7 +21,7 @@ pub struct Options {
/// Connect this whitespace to the previous.
pub connect: bool,
/// Embedded content type to use.
- pub content_type: Option<Content>,
+ pub content: Option<Content>,
}
/// One or more `space_or_tab`.
@@ -45,7 +45,7 @@ pub fn space_or_tab_min_max(tokenizer: &mut Tokenizer, min: usize, max: usize) -
kind: Name::SpaceOrTab,
min,
max,
- content_type: None,
+ content: None,
connect: false,
},
)
@@ -54,7 +54,7 @@ pub fn space_or_tab_min_max(tokenizer: &mut Tokenizer, min: usize, max: usize) -
/// `space_or_tab`, with the given options.
pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName {
tokenizer.tokenize_state.space_or_tab_connect = options.connect;
- tokenizer.tokenize_state.space_or_tab_content_type = options.content_type;
+ tokenizer.tokenize_state.space_or_tab_content = options.content;
tokenizer.tokenize_state.space_or_tab_min = options.min;
tokenizer.tokenize_state.space_or_tab_max = options.max;
tokenizer.tokenize_state.space_or_tab_token = options.kind;
@@ -71,15 +71,23 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.tokenize_state.space_or_tab_max > 0
&& matches!(tokenizer.current, Some(b'\t' | b' '))
{
- tokenizer.enter_with_content(
- tokenizer.tokenize_state.space_or_tab_token.clone(),
- tokenizer.tokenize_state.space_or_tab_content_type.clone(),
- );
+ if let Some(ref content) = tokenizer.tokenize_state.space_or_tab_content {
+ tokenizer.enter_link(
+ tokenizer.tokenize_state.space_or_tab_token.clone(),
+ Link {
+ previous: None,
+ next: None,
+ content: content.clone(),
+ },
+ );
+ } else {
+ tokenizer.enter(tokenizer.tokenize_state.space_or_tab_token.clone());
+ }
if tokenizer.tokenize_state.space_or_tab_connect {
let index = tokenizer.events.len() - 1;
link(&mut tokenizer.events, index);
- } else if tokenizer.tokenize_state.space_or_tab_content_type.is_some() {
+ } else if tokenizer.tokenize_state.space_or_tab_content.is_some() {
tokenizer.tokenize_state.space_or_tab_connect = true;
}
@@ -127,7 +135,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
State::Nok
};
tokenizer.tokenize_state.space_or_tab_connect = false;
- tokenizer.tokenize_state.space_or_tab_content_type = None;
+ tokenizer.tokenize_state.space_or_tab_content = None;
tokenizer.tokenize_state.space_or_tab_size = 0;
tokenizer.tokenize_state.space_or_tab_max = 0;
tokenizer.tokenize_state.space_or_tab_min = 0;
diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs
index 08f4bf2..b38bc64 100644
--- a/src/construct/partial_space_or_tab_eol.rs
+++ b/src/construct/partial_space_or_tab_eol.rs
@@ -7,7 +7,7 @@
use crate::construct::partial_space_or_tab::{
space_or_tab_with_options, Options as SpaceOrTabOptions,
};
-use crate::event::{Content, Name};
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
@@ -18,7 +18,7 @@ pub struct Options {
/// Connect this whitespace to the previous.
pub connect: bool,
/// Embedded content type to use.
- pub content_type: Option<Content>,
+ pub content: Option<Content>,
}
/// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and
@@ -31,7 +31,7 @@ pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName {
space_or_tab_eol_with_options(
tokenizer,
Options {
- content_type: None,
+ content: None,
connect: false,
},
)
@@ -39,7 +39,7 @@ pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName {
/// `space_or_tab_eol`, with the given options.
pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName {
- tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type;
+ tokenizer.tokenize_state.space_or_tab_eol_content = options.content;
tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect;
StateName::SpaceOrTabEolStart
}
@@ -65,10 +65,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
kind: Name::SpaceOrTab,
min: 1,
max: usize::MAX,
- content_type: tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .clone(),
+ content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
},
))
@@ -86,11 +83,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
pub fn after_first(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.space_or_tab_eol_ok = true;
- if tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .is_some()
- {
+ if tokenizer.tokenize_state.space_or_tab_eol_content.is_some() {
tokenizer.tokenize_state.space_or_tab_eol_connect = true;
}
@@ -111,22 +104,23 @@ pub fn after_first(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn at_eol(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\n') = tokenizer.current {
- tokenizer.enter_with_content(
- Name::LineEnding,
- tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .clone(),
- );
+ if let Some(ref content) = tokenizer.tokenize_state.space_or_tab_eol_content {
+ tokenizer.enter_link(
+ Name::LineEnding,
+ Link {
+ previous: None,
+ next: None,
+ content: content.clone(),
+ },
+ );
+ } else {
+ tokenizer.enter(Name::LineEnding);
+ }
if tokenizer.tokenize_state.space_or_tab_eol_connect {
let index = tokenizer.events.len() - 1;
link(&mut tokenizer.events, index);
- } else if tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .is_some()
- {
+ } else if tokenizer.tokenize_state.space_or_tab_eol_content.is_some() {
tokenizer.tokenize_state.space_or_tab_eol_connect = true;
}
@@ -135,7 +129,7 @@ pub fn at_eol(tokenizer: &mut Tokenizer) -> State {
State::Next(StateName::SpaceOrTabEolAfterEol)
} else {
let ok = tokenizer.tokenize_state.space_or_tab_eol_ok;
- tokenizer.tokenize_state.space_or_tab_eol_content_type = None;
+ tokenizer.tokenize_state.space_or_tab_eol_content = None;
tokenizer.tokenize_state.space_or_tab_eol_connect = false;
tokenizer.tokenize_state.space_or_tab_eol_ok = false;
if ok {
@@ -167,10 +161,7 @@ pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
kind: Name::SpaceOrTab,
min: 1,
max: usize::MAX,
- content_type: tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .clone(),
+ content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
},
))
@@ -187,7 +178,7 @@ pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn after_more(tokenizer: &mut Tokenizer) -> State {
- tokenizer.tokenize_state.space_or_tab_eol_content_type = None;
+ tokenizer.tokenize_state.space_or_tab_eol_content = None;
tokenizer.tokenize_state.space_or_tab_eol_connect = false;
tokenizer.tokenize_state.space_or_tab_eol_ok = false;
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index b97243e..93dbd28 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -31,7 +31,7 @@
//! [label_end]: crate::construct::label_end
use crate::construct::partial_space_or_tab_eol::{space_or_tab_eol_with_options, Options};
-use crate::event::{Content, Name};
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
@@ -106,7 +106,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
State::Retry(space_or_tab_eol_with_options(
tokenizer,
Options {
- content_type: Some(Content::String),
+ content: Some(Content::String),
connect: tokenizer.tokenize_state.connect,
},
))
@@ -118,7 +118,14 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::TitleBegin)
}
Some(_) => {
- tokenizer.enter_with_content(Name::Data, Some(Content::String));
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::String,
+ },
+ );
if tokenizer.tokenize_state.connect {
let index = tokenizer.events.len() - 1;
diff --git a/src/content/document.rs b/src/content/document.rs
index 59e6e7c..41d60e2 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -281,13 +281,13 @@ pub fn containers_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.events[previous].link.as_mut().unwrap().next = Some(current);
}
tokenizer.tokenize_state.document_data_index = Some(current);
- tokenizer.enter_with_link(
+ tokenizer.enter_link(
Name::Data,
- Some(Link {
+ Link {
previous,
next: None,
- content_type: Content::Flow,
- }),
+ content: Content::Flow,
+ },
);
State::Retry(StateName::DocumentFlowInside)
}
diff --git a/src/event.rs b/src/event.rs
index 51ecd86..be32b5b 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -1892,7 +1892,7 @@ pub enum Content {
pub struct Link {
pub previous: Option<usize>,
pub next: Option<usize>,
- pub content_type: Content,
+ pub content: Content,
}
/// Place in the document.
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 432c198..f55c790 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -9,7 +9,7 @@
//! * …must occur on [`Enter`][Kind::Enter] events only
//! * …must occur on void events (they are followed by their corresponding
//! [`Exit`][Kind::Exit] event)
-//! * …must have `content_type` field to define the kind of subcontent
+//! * …must have `link` field
//!
//! Links will then be passed through a tokenizer for the corresponding content
//! type by `subtokenize`.
@@ -53,8 +53,8 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
link_next.previous = Some(pevious);
debug_assert_eq!(
- events[pevious].link.as_ref().unwrap().content_type,
- events[next].link.as_ref().unwrap().content_type
+ events[pevious].link.as_ref().unwrap().content,
+ events[next].link.as_ref().unwrap().content
);
}
@@ -80,7 +80,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
// Subtokenizer.
let mut tokenizer = Tokenizer::new(event.point.clone(), parse_state);
// Substate.
- let mut state = State::Next(if link.content_type == Content::String {
+ let mut state = State::Next(if link.content == Content::String {
StateName::StringStart
} else {
StateName::TextStart
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 4a9fa01..dcd34ac 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -141,11 +141,11 @@ pub struct TokenizeState<'a> {
pub document_paragraph_before: bool,
// Couple of very frequent settings for parsing whitespace.
- pub space_or_tab_eol_content_type: Option<Content>,
+ pub space_or_tab_eol_content: Option<Content>,
pub space_or_tab_eol_connect: bool,
pub space_or_tab_eol_ok: bool,
pub space_or_tab_connect: bool,
- pub space_or_tab_content_type: Option<Content>,
+ pub space_or_tab_content: Option<Content>,
pub space_or_tab_min: usize,
pub space_or_tab_max: usize,
pub space_or_tab_size: usize,
@@ -289,11 +289,11 @@ impl<'a> Tokenizer<'a> {
size: 0,
size_b: 0,
size_c: 0,
- space_or_tab_eol_content_type: None,
+ space_or_tab_eol_content: None,
space_or_tab_eol_connect: false,
space_or_tab_eol_ok: false,
space_or_tab_connect: false,
- space_or_tab_content_type: None,
+ space_or_tab_content: None,
space_or_tab_min: 0,
space_or_tab_max: 0,
space_or_tab_size: 0,
@@ -423,34 +423,12 @@ impl<'a> Tokenizer<'a> {
/// Mark the start of a semantic label.
pub fn enter(&mut self, name: Name) {
- self.enter_with_link(name, None);
- }
-
- /// Enter with a content type.
- pub fn enter_with_content(&mut self, name: Name, content_type_opt: Option<Content>) {
- self.enter_with_link(
- name,
- content_type_opt.map(|content_type| Link {
- content_type,
- previous: None,
- next: None,
- }),
- );
+ enter_impl(self, name, None);
}
/// Enter with a link.
- pub fn enter_with_link(&mut self, name: Name, link: Option<Link>) {
- let mut point = self.point.clone();
- move_point_back(self, &mut point);
-
- log::debug!("enter: `{:?}`", name);
- self.events.push(Event {
- kind: Kind::Enter,
- name: name.clone(),
- point,
- link,
- });
- self.stack.push(name);
+ pub fn enter_link(&mut self, name: Name, link: Link) {
+ enter_impl(self, name, Some(link));
}
/// Mark the end of a semantic label.
@@ -597,6 +575,21 @@ fn move_point_back(tokenizer: &mut Tokenizer, point: &mut Point) {
}
}
+/// Enter.
+fn enter_impl(tokenizer: &mut Tokenizer, name: Name, link: Option<Link>) {
+ let mut point = tokenizer.point.clone();
+ move_point_back(tokenizer, &mut point);
+
+ log::debug!("enter: `{:?}`", name);
+ tokenizer.stack.push(name.clone());
+ tokenizer.events.push(Event {
+ kind: Kind::Enter,
+ name,
+ point,
+ link,
+ });
+}
+
/// Run the tokenizer.
fn push_impl(
tokenizer: &mut Tokenizer,