aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-09 10:45:15 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-09 10:45:15 +0200
commit4ce1ac9e41cafa9051377470e8a246063f7d9b1a (patch)
treed678d9583764b2706fe7ea4876e91e40609f15b0 /src/construct
parent8ffed1822bcbc1b6ce6647b840fb03996b0635ea (diff)
downloadmarkdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.tar.gz
markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.tar.bz2
markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.zip
Rewrite algorithm to not pass around boxed functions
* Pass state names from an enum around instead of boxed functions * Refactor to simplify attempts a lot * Use a subtokenizer for the the `document` content type
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/attention.rs6
-rw-r--r--src/construct/autolink.rs44
-rw-r--r--src/construct/blank_line.rs7
-rw-r--r--src/construct/block_quote.rs52
-rw-r--r--src/construct/character_escape.rs7
-rw-r--r--src/construct/character_reference.rs18
-rw-r--r--src/construct/code_fenced.rs140
-rw-r--r--src/construct/code_indented.rs51
-rw-r--r--src/construct/code_text.rs18
-rw-r--r--src/construct/definition.rs72
-rw-r--r--src/construct/hard_break_escape.rs6
-rw-r--r--src/construct/heading_atx.rs49
-rw-r--r--src/construct/heading_setext.rs35
-rw-r--r--src/construct/html_flow.rs188
-rw-r--r--src/construct/html_text.rs183
-rw-r--r--src/construct/label_end.rs110
-rw-r--r--src/construct/label_start_image.rs4
-rw-r--r--src/construct/list.rs119
-rw-r--r--src/construct/paragraph.rs6
-rw-r--r--src/construct/partial_bom.rs6
-rw-r--r--src/construct/partial_data.rs15
-rw-r--r--src/construct/partial_destination.rs30
-rw-r--r--src/construct/partial_label.rs50
-rw-r--r--src/construct/partial_non_lazy_continuation.rs6
-rw-r--r--src/construct/partial_space_or_tab.rs171
-rw-r--r--src/construct/partial_title.rs55
-rw-r--r--src/construct/thematic_break.rs35
27 files changed, 795 insertions, 688 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index fc2acfb..5a98a89 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -52,7 +52,7 @@
//! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element
use crate::token::Token;
-use crate::tokenizer::{Event, EventType, Point, State, Tokenizer};
+use crate::tokenizer::{Event, EventType, Point, State, StateName, Tokenizer};
use crate::unicode::PUNCTUATION;
use crate::util::slice::Slice;
@@ -132,11 +132,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | **
/// ^^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.consume();
- State::Fn(Box::new(inside))
+ State::Fn(StateName::AttentionInside)
}
_ => {
tokenizer.exit(Token::AttentionSequence);
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 1444c61..15bfac1 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -103,7 +103,7 @@
use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of an autolink.
///
@@ -121,7 +121,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(Token::AutolinkMarker);
tokenizer.enter(Token::AutolinkProtocol);
- State::Fn(Box::new(open))
+ State::Fn(StateName::AutolinkOpen)
}
_ => State::Nok,
}
@@ -135,12 +135,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a<user@example.com>b
/// ^
/// ```
-fn open(tokenizer: &mut Tokenizer) -> State {
+pub fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphabetic.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(scheme_or_email_atext))
+ State::Fn(StateName::AutolinkSchemeOrEmailAtext)
}
_ => email_atext(tokenizer),
}
@@ -154,7 +154,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | a<user@example.com>b
/// ^
/// ```
-fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
+pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumeric and `+`, `-`, and `.`.
Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
@@ -174,12 +174,12 @@ fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
/// > | a<user@example.com>b
/// ^
/// ```
-fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
+pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b':') => {
tokenizer.consume();
tokenizer.tokenize_state.size = 0;
- State::Fn(Box::new(url_inside))
+ State::Fn(StateName::AutolinkUrlInside)
}
// ASCII alphanumeric and `+`, `-`, and `.`.
Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
@@ -187,7 +187,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
{
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(scheme_inside_or_email_atext))
+ State::Fn(StateName::AutolinkSchemeInsideOrEmailAtext)
}
_ => {
tokenizer.tokenize_state.size = 0;
@@ -202,7 +202,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
/// > | a<https://example.com>b
/// ^
/// ```
-fn url_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn url_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.exit(Token::AutolinkProtocol);
@@ -212,7 +212,7 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {
None | Some(b'\0'..=0x1F | b' ' | b'<' | 0x7F) => State::Nok,
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(url_inside))
+ State::Fn(StateName::AutolinkUrlInside)
}
}
}
@@ -223,11 +223,11 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {
/// > | a<user.name@example.com>b
/// ^
/// ```
-fn email_atext(tokenizer: &mut Tokenizer) -> State {
+pub fn email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'@') => {
tokenizer.consume();
- State::Fn(Box::new(email_at_sign_or_dot))
+ State::Fn(StateName::AutolinkEmailAtSignOrDot)
}
// ASCII atext.
//
@@ -250,7 +250,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~',
) => {
tokenizer.consume();
- State::Fn(Box::new(email_atext))
+ State::Fn(StateName::AutolinkEmailAtext)
}
_ => State::Nok,
}
@@ -262,7 +262,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
/// > | a<user.name@example.com>b
/// ^ ^
/// ```
-fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {
+pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumeric.
Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => email_value(tokenizer),
@@ -276,12 +276,12 @@ fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {
/// > | a<user.name@example.com>b
/// ^
/// ```
-fn email_label(tokenizer: &mut Tokenizer) -> State {
+pub fn email_label(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'.') => {
tokenizer.tokenize_state.size = 0;
tokenizer.consume();
- State::Fn(Box::new(email_at_sign_or_dot))
+ State::Fn(StateName::AutolinkEmailAtSignOrDot)
}
Some(b'>') => {
tokenizer.tokenize_state.size = 0;
@@ -304,20 +304,20 @@ fn email_label(tokenizer: &mut Tokenizer) -> State {
/// > | a<user.name@ex-ample.com>b
/// ^
/// ```
-fn email_value(tokenizer: &mut Tokenizer) -> State {
+pub fn email_value(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumeric or `-`.
Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
if tokenizer.tokenize_state.size < AUTOLINK_DOMAIN_SIZE_MAX =>
{
- let func = if matches!(tokenizer.current, Some(b'-')) {
- email_value
+ let state_name = if matches!(tokenizer.current, Some(b'-')) {
+ StateName::AutolinkEmailValue
} else {
- email_label
+ StateName::AutolinkEmailLabel
};
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(func))
+ State::Fn(state_name)
}
_ => {
tokenizer.tokenize_state.size = 0;
@@ -334,7 +334,7 @@ fn email_value(tokenizer: &mut Tokenizer) -> State {
/// > | a<user@example.com>b
/// ^
/// ```
-fn end(tokenizer: &mut Tokenizer) -> State {
+pub fn end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.enter(Token::AutolinkMarker);
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index c4eacf5..b12c2c4 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -33,7 +33,7 @@
//! [flow]: crate::content::flow
use crate::construct::partial_space_or_tab::space_or_tab;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of a blank line.
///
@@ -46,7 +46,8 @@ use crate::tokenizer::{State, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab(), after)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::BlankLineAfter)
}
/// After zero or more spaces or tabs, before a line ending or EOF.
@@ -57,7 +58,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | ␊
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => State::Ok,
_ => State::Nok,
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 7e4753d..df58d62 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -36,7 +36,7 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::space_or_tab_min_max;
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of block quote.
///
@@ -45,13 +45,17 @@ use crate::tokenizer::{State, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- let max = if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- };
if tokenizer.parse_state.constructs.block_quote {
- tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::BlockQuoteBefore)
} else {
State::Nok
}
@@ -63,7 +67,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | > a
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.enter(Token::BlockQuote);
@@ -80,13 +84,17 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-pub fn cont(tokenizer: &mut Tokenizer) -> State {
- let max = if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- };
- tokenizer.go(space_or_tab_min_max(0, max), cont_before)(tokenizer)
+pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::BlockQuoteContBefore)
}
/// After whitespace, before `>`.
@@ -96,14 +104,14 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-fn cont_before(tokenizer: &mut Tokenizer) -> State {
+pub fn cont_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.enter(Token::BlockQuotePrefix);
tokenizer.enter(Token::BlockQuoteMarker);
tokenizer.consume();
tokenizer.exit(Token::BlockQuoteMarker);
- State::Fn(Box::new(cont_after))
+ State::Fn(StateName::BlockQuoteContAfter)
}
_ => State::Nok,
}
@@ -117,15 +125,13 @@ fn cont_before(tokenizer: &mut Tokenizer) -> State {
/// > | >b
/// ^
/// ```
-fn cont_after(tokenizer: &mut Tokenizer) -> State {
+pub fn cont_after(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\t' | b' ') = tokenizer.current {
tokenizer.enter(Token::SpaceOrTab);
tokenizer.consume();
tokenizer.exit(Token::SpaceOrTab);
- tokenizer.exit(Token::BlockQuotePrefix);
- State::Ok
- } else {
- tokenizer.exit(Token::BlockQuotePrefix);
- State::Ok
}
+
+ tokenizer.exit(Token::BlockQuotePrefix);
+ State::Ok
}
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index 4419d7a..de09f17 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -34,7 +34,7 @@
//! [hard_break_escape]: crate::construct::hard_break_escape
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of a character escape.
///
@@ -49,7 +49,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::CharacterEscapeMarker);
tokenizer.consume();
tokenizer.exit(Token::CharacterEscapeMarker);
- State::Fn(Box::new(inside))
+ State::Fn(StateName::CharacterEscapeInside)
}
_ => State::Nok,
}
@@ -61,7 +61,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a\*b
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+// StateName::CharacterEscapeInside
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII punctuation.
Some(b'!'..=b'/' | b':'..=b'@' | b'['..=b'`' | b'{'..=b'~') => {
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 7cc74ba..ba05fab 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -66,7 +66,7 @@ use crate::constant::{
CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, CHARACTER_REFERENCE_NAMED_SIZE_MAX,
};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
use crate::util::slice::Slice;
/// Start of a character reference.
@@ -86,7 +86,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::CharacterReferenceMarker);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarker);
- State::Fn(Box::new(open))
+ State::Fn(StateName::CharacterReferenceOpen)
}
_ => State::Nok,
}
@@ -103,12 +103,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a&#x9;b
/// ^
/// ```
-fn open(tokenizer: &mut Tokenizer) -> State {
+// StateName::CharacterReferenceOpen
+pub fn open(tokenizer: &mut Tokenizer) -> State {
if let Some(b'#') = tokenizer.current {
tokenizer.enter(Token::CharacterReferenceMarkerNumeric);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarkerNumeric);
- State::Fn(Box::new(numeric))
+ State::Fn(StateName::CharacterReferenceNumeric)
} else {
tokenizer.tokenize_state.marker = b'&';
tokenizer.enter(Token::CharacterReferenceValue);
@@ -125,14 +126,15 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | a&#x9;b
/// ^
/// ```
-fn numeric(tokenizer: &mut Tokenizer) -> State {
+// StateName::CharacterReferenceNumeric
+pub fn numeric(tokenizer: &mut Tokenizer) -> State {
if let Some(b'x' | b'X') = tokenizer.current {
tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal);
tokenizer.enter(Token::CharacterReferenceValue);
tokenizer.tokenize_state.marker = b'x';
- State::Fn(Box::new(value))
+ State::Fn(StateName::CharacterReferenceValue)
} else {
tokenizer.enter(Token::CharacterReferenceValue);
tokenizer.tokenize_state.marker = b'#';
@@ -154,7 +156,7 @@ fn numeric(tokenizer: &mut Tokenizer) -> State {
/// > | a&#x9;b
/// ^
/// ```
-fn value(tokenizer: &mut Tokenizer) -> State {
+pub fn value(tokenizer: &mut Tokenizer) -> State {
if matches!(tokenizer.current, Some(b';')) && tokenizer.tokenize_state.size > 0 {
// Named.
if tokenizer.tokenize_state.marker == b'&' {
@@ -200,7 +202,7 @@ fn value(tokenizer: &mut Tokenizer) -> State {
if tokenizer.tokenize_state.size < max && test(&byte) {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- return State::Fn(Box::new(value));
+ return State::Fn(StateName::CharacterReferenceValue);
}
}
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index a22a0f9..46c5f9f 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -102,12 +102,9 @@
//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
-use crate::construct::{
- partial_non_lazy_continuation::start as partial_non_lazy_continuation,
- partial_space_or_tab::{space_or_tab, space_or_tab_min_max},
-};
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
use crate::util::slice::{Position, Slice};
/// Start of fenced code.
@@ -122,17 +119,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.code_fenced {
tokenizer.enter(Token::CodeFenced);
tokenizer.enter(Token::CodeFencedFence);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before_sequence_open,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceOpen)
} else {
State::Nok
}
@@ -146,7 +142,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
+pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
let tail = tokenizer.events.last();
let mut prefix = 0;
@@ -178,16 +174,17 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn sequence_open(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(sequence_open))
+ State::Fn(StateName::CodeFencedSequenceOpen)
}
_ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => {
tokenizer.exit(Token::CodeFencedFenceSequence);
- tokenizer.attempt_opt(space_or_tab(), info_before)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::CodeFencedInfoBefore)
}
_ => {
tokenizer.tokenize_state.marker = 0;
@@ -206,7 +203,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn info_before(tokenizer: &mut Tokenizer) -> State {
+pub fn info_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
@@ -217,7 +214,7 @@ fn info_before(tokenizer: &mut Tokenizer) -> State {
_ => {
tokenizer.enter(Token::CodeFencedFenceInfo);
tokenizer.enter_with_content(Token::Data, Some(ContentType::String));
- info_inside(tokenizer)
+ info(tokenizer)
}
}
}
@@ -230,7 +227,7 @@ fn info_before(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn info_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn info(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -243,7 +240,8 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State {
Some(b'\t' | b' ') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceInfo);
- tokenizer.attempt_opt(space_or_tab(), meta_before)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::CodeFencedMetaBefore)
}
Some(b'`') if tokenizer.tokenize_state.marker == b'`' => {
tokenizer.concrete = false;
@@ -254,7 +252,7 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State {
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(info_inside))
+ State::Fn(StateName::CodeFencedInfo)
}
}
}
@@ -267,7 +265,7 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn meta_before(tokenizer: &mut Tokenizer) -> State {
+pub fn meta_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
@@ -291,7 +289,7 @@ fn meta_before(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn meta(tokenizer: &mut Tokenizer) -> State {
+pub fn meta(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -310,7 +308,7 @@ fn meta(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(meta))
+ State::Fn(StateName::CodeFencedMeta)
}
}
}
@@ -324,10 +322,14 @@ fn meta(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
- tokenizer.check(partial_non_lazy_continuation, |ok| {
- Box::new(if ok { at_non_lazy_break } else { after })
- })(tokenizer)
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.check(StateName::NonLazyContinuationStart, |ok| {
+ State::Fn(if ok {
+ StateName::CodeFencedAtNonLazyBreak
+ } else {
+ StateName::CodeFencedAfter
+ })
+ })
}
/// At an eol/eof in code, before a non-lazy closing fence or content.
@@ -339,10 +341,14 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(close_begin, |ok| {
- Box::new(if ok { after } else { content_before })
- })(tokenizer)
+pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(StateName::CodeFencedCloseBefore, |ok| {
+ State::Fn(if ok {
+ StateName::CodeFencedAfter
+ } else {
+ StateName::CodeFencedContentBefore
+ })
+ })
}
/// Before a closing fence, at the line ending.
@@ -353,13 +359,13 @@ fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn close_begin(tokenizer: &mut Tokenizer) -> State {
+pub fn close_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(close_start))
+ State::Fn(StateName::CodeFencedCloseStart)
}
_ => unreachable!("expected eol"),
}
@@ -373,19 +379,18 @@ fn close_begin(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_start(tokenizer: &mut Tokenizer) -> State {
+pub fn close_start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::CodeFencedFence);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- close_before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceClose)
}
/// In a closing fence, after optional whitespace, before sequence.
@@ -396,11 +401,11 @@ fn close_start(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_before(tokenizer: &mut Tokenizer) -> State {
+pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.enter(Token::CodeFencedFenceSequence);
- close_sequence(tokenizer)
+ sequence_close(tokenizer)
}
_ => State::Nok,
}
@@ -414,19 +419,20 @@ fn close_before(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_sequence(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.tokenize_state.size_other += 1;
tokenizer.consume();
- State::Fn(Box::new(close_sequence))
+ State::Fn(StateName::CodeFencedSequenceClose)
}
_ if tokenizer.tokenize_state.size_other >= CODE_FENCED_SEQUENCE_SIZE_MIN
&& tokenizer.tokenize_state.size_other >= tokenizer.tokenize_state.size =>
{
tokenizer.tokenize_state.size_other = 0;
tokenizer.exit(Token::CodeFencedFenceSequence);
- tokenizer.attempt_opt(space_or_tab(), close_sequence_after)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::CodeFencedAfterSequenceClose)
}
_ => {
tokenizer.tokenize_state.size_other = 0;
@@ -443,7 +449,7 @@ fn close_sequence(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_sequence_after(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
@@ -461,11 +467,11 @@ fn close_sequence_after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn content_before(tokenizer: &mut Tokenizer) -> State {
+pub fn content_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(content_start))
+ State::Fn(StateName::CodeFencedContentStart)
}
/// Before code content, definitely not before a closing fence.
///
@@ -475,11 +481,9 @@ fn content_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn content_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(
- space_or_tab_min_max(0, tokenizer.tokenize_state.prefix),
- content_begin,
- )(tokenizer)
+pub fn content_start(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.prefix);
+ tokenizer.go(state_name, StateName::CodeFencedBeforeContentChunk)
}
/// Before code content, after a prefix.
@@ -490,12 +494,12 @@ fn content_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn content_begin(tokenizer: &mut Tokenizer) -> State {
+pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => at_break(tokenizer),
_ => {
tokenizer.enter(Token::CodeFlowChunk);
- content_continue(tokenizer)
+ content_chunk(tokenizer)
}
}
}
@@ -508,7 +512,7 @@ fn content_begin(tokenizer: &mut Tokenizer) -> State {
/// ^^^^^^^^^^^^^^
/// | ~~~
/// ```
-fn content_continue(tokenizer: &mut Tokenizer) -> State {
+pub fn content_chunk(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFlowChunk);
@@ -516,7 +520,7 @@ fn content_continue(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(content_continue))
+ State::Fn(StateName::CodeFencedContentChunk)
}
}
}
@@ -529,7 +533,7 @@ fn content_continue(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::CodeFenced);
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.prefix = 0;
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 81a3080..516b493 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -48,7 +48,7 @@
use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::TAB_SIZE;
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of code (indented).
///
@@ -64,7 +64,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
// Do not interrupt paragraphs.
if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented {
tokenizer.enter(Token::CodeIndented);
- tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer)
+ let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE);
+ tokenizer.go(state_name, StateName::CodeIndentedAtBreak)
} else {
State::Nok
}
@@ -76,15 +77,19 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | aaa
/// ^ ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => after(tokenizer),
- Some(b'\n') => tokenizer.attempt(further_start, |ok| {
- Box::new(if ok { at_break } else { after })
- })(tokenizer),
+ Some(b'\n') => tokenizer.attempt(StateName::CodeIndentedFurtherStart, |ok| {
+ State::Fn(if ok {
+ StateName::CodeIndentedAtBreak
+ } else {
+ StateName::CodeIndentedAfter
+ })
+ }),
_ => {
tokenizer.enter(Token::CodeFlowChunk);
- content(tokenizer)
+ inside(tokenizer)
}
}
}
@@ -95,7 +100,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// > | aaa
/// ^^^^
/// ```
-fn content(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFlowChunk);
@@ -103,7 +108,7 @@ fn content(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(content))
+ State::Fn(StateName::CodeIndentedInside)
}
}
}
@@ -114,7 +119,7 @@ fn content(tokenizer: &mut Tokenizer) -> State {
/// > | aaa
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::CodeIndented);
// Feel free to interrupt.
tokenizer.interrupt = false;
@@ -128,17 +133,24 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | bbb
/// ```
-fn further_start(tokenizer: &mut Tokenizer) -> State {
+pub fn further_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') if !tokenizer.lazy => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(further_start))
+ State::Fn(StateName::CodeIndentedFurtherStart)
+ }
+ _ if !tokenizer.lazy => {
+ let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE);
+ tokenizer.attempt(state_name, |ok| {
+ State::Fn(if ok {
+ StateName::CodeIndentedFurtherEnd
+ } else {
+ StateName::CodeIndentedFurtherBegin
+ })
+ })
}
- _ if !tokenizer.lazy => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
- Box::new(if ok { further_end } else { further_begin })
- })(tokenizer),
_ => State::Nok,
}
}
@@ -150,7 +162,7 @@ fn further_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | bbb
/// ```
-fn further_end(_tokenizer: &mut Tokenizer) -> State {
+pub fn further_end(_tokenizer: &mut Tokenizer) -> State {
State::Ok
}
@@ -161,8 +173,9 @@ fn further_end(_tokenizer: &mut Tokenizer) -> State {
/// > | bbb
/// ^
/// ```
-fn further_begin(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab(), further_after)(tokenizer)
+pub fn further_begin(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::CodeIndentedFurtherAfter)
}
/// After whitespace, not indented enough.
@@ -172,7 +185,7 @@ fn further_begin(tokenizer: &mut Tokenizer) -> State {
/// > | bbb
/// ^
/// ```
-fn further_after(tokenizer: &mut Tokenizer) -> State {
+pub fn further_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => further_start(tokenizer),
_ => State::Nok,
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index 31777f4..5bdefbb 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -84,7 +84,7 @@
//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of code (text).
///
@@ -117,11 +117,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | `a`
/// ^
/// ```
-fn sequence_open(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
if let Some(b'`') = tokenizer.current {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(sequence_open))
+ State::Fn(StateName::CodeTextSequenceOpen)
} else {
tokenizer.exit(Token::CodeTextSequence);
between(tokenizer)
@@ -134,7 +134,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {
/// > | `a`
/// ^^
/// ```
-fn between(tokenizer: &mut Tokenizer) -> State {
+pub fn between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => {
tokenizer.tokenize_state.size = 0;
@@ -144,7 +144,7 @@ fn between(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(between))
+ State::Fn(StateName::CodeTextBetween)
}
Some(b'`') => {
tokenizer.enter(Token::CodeTextSequence);
@@ -163,7 +163,7 @@ fn between(tokenizer: &mut Tokenizer) -> State {
/// > | `a`
/// ^
/// ```
-fn data(tokenizer: &mut Tokenizer) -> State {
+pub fn data(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n' | b'`') => {
tokenizer.exit(Token::CodeTextData);
@@ -171,7 +171,7 @@ fn data(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(data))
+ State::Fn(StateName::CodeTextData)
}
}
}
@@ -182,12 +182,12 @@ fn data(tokenizer: &mut Tokenizer) -> State {
/// > | `a`
/// ^
/// ```
-fn sequence_close(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`') => {
tokenizer.tokenize_state.size_other += 1;
tokenizer.consume();
- State::Fn(Box::new(sequence_close))
+ State::Fn(StateName::CodeTextSequenceClose)
}
_ => {
if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_other {
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index a56dab4..fbad99d 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -93,14 +93,9 @@
//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
-use crate::construct::{
- partial_destination::start as destination,
- partial_label::start as label,
- partial_space_or_tab::{space_or_tab, space_or_tab_eol},
- partial_title::start as title,
-};
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_eol};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
use crate::util::skip::opt_back as skip_opt_back;
/// At the start of a definition.
@@ -124,7 +119,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
if possible && tokenizer.parse_state.constructs.definition {
tokenizer.enter(Token::Definition);
// Note: arbitrary whitespace allowed even if code (indented) is on.
- tokenizer.attempt_opt(space_or_tab(), before)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::DefinitionBefore)
} else {
State::Nok
}
@@ -136,13 +132,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[') => {
tokenizer.tokenize_state.token_1 = Token::DefinitionLabel;
tokenizer.tokenize_state.token_2 = Token::DefinitionLabelMarker;
tokenizer.tokenize_state.token_3 = Token::DefinitionLabelString;
- tokenizer.go(label, label_after)(tokenizer)
+ tokenizer.go(StateName::LabelStart, StateName::DefinitionLabelAfter)
}
_ => State::Nok,
}
@@ -154,7 +150,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn label_after(tokenizer: &mut Tokenizer) -> State {
+pub fn label_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
@@ -164,34 +160,38 @@ fn label_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::DefinitionMarker);
tokenizer.consume();
tokenizer.exit(Token::DefinitionMarker);
- State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab_eol(), destination_before),
- ))
+ State::Fn(StateName::DefinitionMarkerAfter)
}
_ => State::Nok,
}
}
+/// To do.
+pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_eol(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::DefinitionDestinationBefore)
+}
+
/// Before a destination.
///
/// ```markdown
/// > | [a]: b "c"
/// ^
/// ```
-fn destination_before(tokenizer: &mut Tokenizer) -> State {
+pub fn destination_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::DefinitionDestination;
tokenizer.tokenize_state.token_2 = Token::DefinitionDestinationLiteral;
tokenizer.tokenize_state.token_3 = Token::DefinitionDestinationLiteralMarker;
tokenizer.tokenize_state.token_4 = Token::DefinitionDestinationRaw;
tokenizer.tokenize_state.token_5 = Token::DefinitionDestinationString;
tokenizer.tokenize_state.size_other = usize::MAX;
- tokenizer.attempt(destination, |ok| {
- Box::new(if ok {
- destination_after
+ tokenizer.attempt(StateName::DestinationStart, |ok| {
+ State::Fn(if ok {
+ StateName::DefinitionDestinationAfter
} else {
- destination_missing
+ StateName::DefinitionDestinationMissing
})
- })(tokenizer)
+ })
}
/// After a destination.
@@ -200,18 +200,18 @@ fn destination_before(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn destination_after(tokenizer: &mut Tokenizer) -> State {
+pub fn destination_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
tokenizer.tokenize_state.token_4 = Token::Data;
tokenizer.tokenize_state.token_5 = Token::Data;
tokenizer.tokenize_state.size_other = 0;
- tokenizer.attempt_opt(title_before, after)(tokenizer)
+ tokenizer.attempt_opt(StateName::DefinitionTitleBefore, StateName::DefinitionAfter)
}
/// Without destination.
-fn destination_missing(tokenizer: &mut Tokenizer) -> State {
+pub fn destination_missing(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
@@ -229,8 +229,9 @@ fn destination_missing(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab(), after_whitespace)(tokenizer)
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::DefinitionAfterWhitespace)
}
/// After a definition, after optional whitespace.
@@ -241,7 +242,7 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
+pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Definition);
@@ -261,8 +262,9 @@ fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn title_before(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(space_or_tab_eol(), title_before_marker)(tokenizer)
+pub fn title_before(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_eol(tokenizer);
+ tokenizer.go(state_name, StateName::DefinitionTitleBeforeMarker)
}
/// Before a title, after a line ending.
@@ -272,11 +274,11 @@ fn title_before(tokenizer: &mut Tokenizer) -> State {
/// > | "c"
/// ^
/// ```
-fn title_before_marker(tokenizer: &mut Tokenizer) -> State {
+pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::DefinitionTitle;
tokenizer.tokenize_state.token_2 = Token::DefinitionTitleMarker;
tokenizer.tokenize_state.token_3 = Token::DefinitionTitleString;
- tokenizer.go(title, title_after)(tokenizer)
+ tokenizer.go(StateName::TitleStart, StateName::DefinitionTitleAfter)
}
/// After a title.
@@ -285,11 +287,15 @@ fn title_before_marker(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn title_after(tokenizer: &mut Tokenizer) -> State {
+pub fn title_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
- tokenizer.attempt_opt(space_or_tab(), title_after_after_optional_whitespace)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(
+ state_name,
+ StateName::DefinitionTitleAfterOptionalWhitespace,
+ )
}
/// After a title, after optional whitespace.
@@ -298,7 +304,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn title_after_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State {
+pub fn title_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => State::Ok,
_ => State::Nok,
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index d09bf54..47b7e94 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -40,7 +40,7 @@
//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of a hard break (escape).
///
@@ -54,7 +54,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => {
tokenizer.enter(Token::HardBreakEscape);
tokenizer.consume();
- State::Fn(Box::new(after))
+ State::Fn(StateName::HardBreakEscapeAfter)
}
_ => State::Nok,
}
@@ -67,7 +67,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | b
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.exit(Token::HardBreakEscape);
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 6751567..45c4758 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -57,7 +57,7 @@
use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
use crate::token::Token;
-use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer};
+use crate::tokenizer::{ContentType, Event, EventType, State, StateName, Tokenizer};
/// Start of a heading (atx).
///
@@ -68,17 +68,16 @@ use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer};
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.heading_atx {
tokenizer.enter(Token::HeadingAtx);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::HeadingAtxBefore)
} else {
State::Nok
}
@@ -90,7 +89,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
if Some(b'#') == tokenizer.current {
tokenizer.enter(Token::HeadingAtxSequence);
sequence_open(tokenizer)
@@ -105,7 +104,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa
/// ^
/// ```
-fn sequence_open(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => {
tokenizer.tokenize_state.size = 0;
@@ -115,12 +114,13 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {
Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(sequence_open))
+ State::Fn(StateName::HeadingAtxSequenceOpen)
}
_ if tokenizer.tokenize_state.size > 0 => {
tokenizer.tokenize_state.size = 0;
tokenizer.exit(Token::HeadingAtxSequence);
- tokenizer.go(space_or_tab(), at_break)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.go(state_name, StateName::HeadingAtxAtBreak)
}
_ => {
tokenizer.tokenize_state.size = 0;
@@ -135,7 +135,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::HeadingAtx);
@@ -144,10 +144,13 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.interrupt = false;
State::Ok
}
- Some(b'\t' | b' ') => tokenizer.go(space_or_tab(), at_break)(tokenizer),
+ Some(b'\t' | b' ') => {
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.go(state_name, StateName::HeadingAtxAtBreak)
+ }
Some(b'#') => {
tokenizer.enter(Token::HeadingAtxSequence);
- further_sequence(tokenizer)
+ sequence_further(tokenizer)
}
Some(_) => {
tokenizer.enter_with_content(Token::Data, Some(ContentType::Text));
@@ -164,10 +167,10 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa ##
/// ^
/// ```
-fn further_sequence(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_further(tokenizer: &mut Tokenizer) -> State {
if let Some(b'#') = tokenizer.current {
tokenizer.consume();
- State::Fn(Box::new(further_sequence))
+ State::Fn(StateName::HeadingAtxSequenceFurther)
} else {
tokenizer.exit(Token::HeadingAtxSequence);
at_break(tokenizer)
@@ -180,7 +183,7 @@ fn further_sequence(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa
/// ^
/// ```
-fn data(tokenizer: &mut Tokenizer) -> State {
+pub fn data(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.
None | Some(b'\t' | b'\n' | b' ') => {
@@ -189,7 +192,7 @@ fn data(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(data))
+ State::Fn(StateName::HeadingAtxData)
}
}
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 675b2ac..50feba4 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -60,7 +60,7 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::token::Token;
-use crate::tokenizer::{EventType, State, Tokenizer};
+use crate::tokenizer::{EventType, State, StateName, Tokenizer};
use crate::util::skip::opt_back as skip_opt_back;
/// At a line ending, presumably an underline.
@@ -83,17 +83,17 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
.token_type
== Token::Paragraph)
{
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+
+ tokenizer.go(state_name, StateName::HeadingSetextBefore)
} else {
State::Nok
}
@@ -106,7 +106,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | ==
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-' | b'=') => {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
@@ -124,16 +124,17 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | ==
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-' | b'=') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.consume();
- State::Fn(Box::new(inside))
+ State::Fn(StateName::HeadingSetextInside)
}
_ => {
tokenizer.tokenize_state.marker = 0;
tokenizer.exit(Token::HeadingSetextUnderline);
- tokenizer.attempt_opt(space_or_tab(), after)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::HeadingSetextAfter)
}
}
}
@@ -145,7 +146,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
/// > | ==
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
// Feel free to interrupt.
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index aaa803d..779146c 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -101,13 +101,11 @@
use crate::constant::{
HTML_BLOCK_NAMES, HTML_CDATA_PREFIX, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE,
};
-use crate::construct::{
- blank_line::start as blank_line,
- partial_non_lazy_continuation::start as partial_non_lazy_continuation,
- partial_space_or_tab::{space_or_tab_with_options, Options as SpaceOrTabOptions},
+use crate::construct::partial_space_or_tab::{
+ space_or_tab_with_options, Options as SpaceOrTabOptions,
};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
use crate::util::slice::Slice;
/// Symbol for `<script>` (condition 1).
@@ -134,8 +132,9 @@ const COMPLETE: u8 = 7;
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.html_flow {
tokenizer.enter(Token::HtmlFlow);
- tokenizer.go(
- space_or_tab_with_options(SpaceOrTabOptions {
+ let state_name = space_or_tab_with_options(
+ tokenizer,
+ SpaceOrTabOptions {
kind: Token::HtmlFlowData,
min: 0,
max: if tokenizer.parse_state.constructs.code_indented {
@@ -145,9 +144,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
},
connect: false,
content_type: None,
- }),
- before,
- )(tokenizer)
+ },
+ );
+
+ tokenizer.go(state_name, StateName::HtmlFlowBefore)
} else {
State::Nok
}
@@ -159,11 +159,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | <x />
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
if Some(b'<') == tokenizer.current {
tokenizer.enter(Token::HtmlFlowData);
tokenizer.consume();
- State::Fn(Box::new(open))
+ State::Fn(StateName::HtmlFlowOpen)
} else {
State::Nok
}
@@ -179,17 +179,17 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn open(tokenizer: &mut Tokenizer) -> State {
+pub fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'!') => {
tokenizer.consume();
- State::Fn(Box::new(declaration_open))
+ State::Fn(StateName::HtmlFlowDeclarationOpen)
}
Some(b'/') => {
tokenizer.consume();
tokenizer.tokenize_state.seen = true;
tokenizer.tokenize_state.start = tokenizer.point.index;
- State::Fn(Box::new(tag_close_start))
+ State::Fn(StateName::HtmlFlowTagCloseStart)
}
Some(b'?') => {
tokenizer.tokenize_state.marker = INSTRUCTION;
@@ -198,7 +198,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
tokenizer.concrete = true;
// While we’re in an instruction instead of a declaration, we’re on a `?`
// right now, so we do need to search for `>`, similar to declarations.
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
// ASCII alphabetical.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
@@ -219,24 +219,24 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | <![CDATA[>&<]]>
/// ^
/// ```
-fn declaration_open(tokenizer: &mut Tokenizer) -> State {
+pub fn declaration_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
tokenizer.tokenize_state.marker = COMMENT;
- State::Fn(Box::new(comment_open_inside))
+ State::Fn(StateName::HtmlFlowCommentOpenInside)
}
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
tokenizer.tokenize_state.marker = DECLARATION;
// Do not form containers.
tokenizer.concrete = true;
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
Some(b'[') => {
tokenizer.consume();
tokenizer.tokenize_state.marker = CDATA;
- State::Fn(Box::new(cdata_open_inside))
+ State::Fn(StateName::HtmlFlowCdataOpenInside)
}
_ => State::Nok,
}
@@ -248,12 +248,12 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
if let Some(b'-') = tokenizer.current {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
} else {
tokenizer.tokenize_state.marker = 0;
State::Nok
@@ -266,7 +266,7 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
/// > | <![CDATA[>&<]]>
/// ^^^^^^
/// ```
-fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
@@ -275,9 +275,9 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size = 0;
// Do not form containers.
tokenizer.concrete = true;
- State::Fn(Box::new(continuation))
+ State::Fn(StateName::HtmlFlowContinuation)
} else {
- State::Fn(Box::new(cdata_open_inside))
+ State::Fn(StateName::HtmlFlowCdataOpenInside)
}
} else {
tokenizer.tokenize_state.marker = 0;
@@ -292,10 +292,10 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
/// > | </x>
/// ^
/// ```
-fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
if let Some(b'A'..=b'Z' | b'a'..=b'z') = tokenizer.current {
tokenizer.consume();
- State::Fn(Box::new(tag_name))
+ State::Fn(StateName::HtmlFlowTagName)
} else {
tokenizer.tokenize_state.seen = false;
tokenizer.tokenize_state.start = 0;
@@ -311,7 +311,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
/// > | </ab>
/// ^^
/// ```
-fn tag_name(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => {
let closing_tag = tokenizer.tokenize_state.seen;
@@ -340,7 +340,7 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State {
if slash {
tokenizer.consume();
- State::Fn(Box::new(basic_self_closing))
+ State::Fn(StateName::HtmlFlowBasicSelfClosing)
} else {
// Do not form containers.
tokenizer.concrete = true;
@@ -363,7 +363,7 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State {
// ASCII alphanumerical and `-`.
Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_name))
+ State::Fn(StateName::HtmlFlowTagName)
}
Some(_) => {
tokenizer.tokenize_state.seen = false;
@@ -378,12 +378,12 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State {
/// > | <div/>
/// ^
/// ```
-fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {
+pub fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {
if let Some(b'>') = tokenizer.current {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
- State::Fn(Box::new(continuation))
+ State::Fn(StateName::HtmlFlowContinuation)
} else {
tokenizer.tokenize_state.marker = 0;
State::Nok
@@ -396,11 +396,11 @@ fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {
/// > | <x/>
/// ^
/// ```
-fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_closing_tag_after))
+ State::Fn(StateName::HtmlFlowCompleteClosingTagAfter)
}
_ => complete_end(tokenizer),
}
@@ -425,20 +425,20 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {
/// > | <a >
/// ^
/// ```
-fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_name_before))
+ State::Fn(StateName::HtmlFlowCompleteAttributeNameBefore)
}
Some(b'/') => {
tokenizer.consume();
- State::Fn(Box::new(complete_end))
+ State::Fn(StateName::HtmlFlowCompleteEnd)
}
// ASCII alphanumerical and `:` and `_`.
Some(b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_name))
+ State::Fn(StateName::HtmlFlowCompleteAttributeName)
}
_ => complete_end(tokenizer),
}
@@ -454,12 +454,12 @@ fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {
/// > | <a b>
/// ^
/// ```
-fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumerical and `-`, `.`, `:`, and `_`.
Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_name))
+ State::Fn(StateName::HtmlFlowCompleteAttributeName)
}
_ => complete_attribute_name_after(tokenizer),
}
@@ -474,15 +474,15 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {
/// > | <a b=c>
/// ^
/// ```
-fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_name_after))
+ State::Fn(StateName::HtmlFlowCompleteAttributeNameAfter)
}
Some(b'=') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_before))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueBefore)
}
_ => complete_attribute_name_before(tokenizer),
}
@@ -497,7 +497,7 @@ fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
/// > | <a b="c">
/// ^
/// ```
-fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'<' | b'=' | b'>' | b'`') => {
tokenizer.tokenize_state.marker = 0;
@@ -505,12 +505,12 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_before))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueBefore)
}
Some(b'"' | b'\'') => {
tokenizer.tokenize_state.marker_other = tokenizer.current.unwrap();
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_quoted))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueQuoted)
}
_ => complete_attribute_value_unquoted(tokenizer),
}
@@ -524,7 +524,7 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// > | <a b='c'>
/// ^
/// ```
-fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.tokenize_state.marker = 0;
@@ -536,11 +536,11 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
{
tokenizer.tokenize_state.marker_other = 0;
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_quoted_after))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueQuotedAfter)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_quoted))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueQuoted)
}
}
}
@@ -551,14 +551,14 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
/// > | <a b=c>
/// ^
/// ```
-fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => {
complete_attribute_name_after(tokenizer)
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_unquoted))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueUnquoted)
}
}
}
@@ -570,7 +570,7 @@ fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
/// > | <a b="c">
/// ^
/// ```
-fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\t' | b' ' | b'/' | b'>') = tokenizer.current {
complete_attribute_name_before(tokenizer)
} else {
@@ -585,10 +585,10 @@ fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
/// > | <a b="c">
/// ^
/// ```
-fn complete_end(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_end(tokenizer: &mut Tokenizer) -> State {
if let Some(b'>') = tokenizer.current {
tokenizer.consume();
- State::Fn(Box::new(complete_after))
+ State::Fn(StateName::HtmlFlowCompleteAfter)
} else {
tokenizer.tokenize_state.marker = 0;
State::Nok
@@ -601,7 +601,7 @@ fn complete_end(tokenizer: &mut Tokenizer) -> State {
/// > | <x>
/// ^
/// ```
-fn complete_after(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
// Do not form containers.
@@ -610,7 +610,7 @@ fn complete_after(tokenizer: &mut Tokenizer) -> State {
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_after))
+ State::Fn(StateName::HtmlFlowCompleteAfter)
}
Some(_) => {
tokenizer.tokenize_state.marker = 0;
@@ -625,20 +625,20 @@ fn complete_after(tokenizer: &mut Tokenizer) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn continuation(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n')
if tokenizer.tokenize_state.marker == BASIC
|| tokenizer.tokenize_state.marker == COMPLETE =>
{
tokenizer.exit(Token::HtmlFlowData);
- tokenizer.check(blank_line_before, |ok| {
- Box::new(if ok {
- continuation_after
+ tokenizer.check(StateName::HtmlFlowBlankLineBefore, |ok| {
+ State::Fn(if ok {
+ StateName::HtmlFlowContinuationAfter
} else {
- continuation_start
+ StateName::HtmlFlowContinuationStart
})
- })(tokenizer)
+ })
}
// Note: important that this is after the basic/complete case.
None | Some(b'\n') => {
@@ -647,27 +647,27 @@ fn continuation(tokenizer: &mut Tokenizer) -> State {
}
Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {
tokenizer.consume();
- State::Fn(Box::new(continuation_comment_inside))
+ State::Fn(StateName::HtmlFlowContinuationCommentInside)
}
Some(b'<') if tokenizer.tokenize_state.marker == RAW => {
tokenizer.consume();
- State::Fn(Box::new(continuation_raw_tag_open))
+ State::Fn(StateName::HtmlFlowContinuationRawTagOpen)
}
Some(b'>') if tokenizer.tokenize_state.marker == DECLARATION => {
tokenizer.consume();
- State::Fn(Box::new(continuation_close))
+ State::Fn(StateName::HtmlFlowContinuationClose)
}
Some(b'?') if tokenizer.tokenize_state.marker == INSTRUCTION => {
tokenizer.consume();
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
Some(b']') if tokenizer.tokenize_state.marker == CDATA => {
tokenizer.consume();
- State::Fn(Box::new(continuation_character_data_inside))
+ State::Fn(StateName::HtmlFlowContinuationCdataInside)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(continuation))
+ State::Fn(StateName::HtmlFlowContinuation)
}
}
}
@@ -679,14 +679,14 @@ fn continuation(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | asd
/// ```
-fn continuation_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.check(partial_non_lazy_continuation, |ok| {
- Box::new(if ok {
- continuation_start_non_lazy
+pub fn continuation_start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.check(StateName::NonLazyContinuationStart, |ok| {
+ State::Fn(if ok {
+ StateName::HtmlFlowContinuationStartNonLazy
} else {
- continuation_after
+ StateName::HtmlFlowContinuationAfter
})
- })(tokenizer)
+ })
}
/// In continuation, at an eol, before non-lazy content.
@@ -696,13 +696,13 @@ fn continuation_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | asd
/// ```
-fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(continuation_before))
+ State::Fn(StateName::HtmlFlowContinuationBefore)
}
_ => unreachable!("expected eol"),
}
@@ -715,7 +715,7 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {
/// > | asd
/// ^
/// ```
-fn continuation_before(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => continuation_start(tokenizer),
_ => {
@@ -731,11 +731,11 @@ fn continuation_before(tokenizer: &mut Tokenizer) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
_ => continuation(tokenizer),
}
@@ -747,12 +747,12 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {
/// > | <script>console.log(1)</script>
/// ^
/// ```
-fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'/') => {
tokenizer.consume();
tokenizer.tokenize_state.start = tokenizer.point.index;
- State::Fn(Box::new(continuation_raw_end_tag))
+ State::Fn(StateName::HtmlFlowContinuationRawEndTag)
}
_ => continuation(tokenizer),
}
@@ -764,7 +764,7 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {
/// > | <script>console.log(1)</script>
/// ^^^^^^
/// ```
-fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
// Guaranteed to be valid ASCII bytes.
@@ -779,7 +779,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
if HTML_RAW_NAMES.contains(&name.as_str()) {
tokenizer.consume();
- State::Fn(Box::new(continuation_close))
+ State::Fn(StateName::HtmlFlowContinuationClose)
} else {
continuation(tokenizer)
}
@@ -788,7 +788,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
if tokenizer.point.index - tokenizer.tokenize_state.start < HTML_RAW_SIZE_MAX =>
{
tokenizer.consume();
- State::Fn(Box::new(continuation_raw_end_tag))
+ State::Fn(StateName::HtmlFlowContinuationRawEndTag)
}
_ => {
tokenizer.tokenize_state.start = 0;
@@ -803,11 +803,11 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
/// > | <![CDATA[>&<]]>
/// ^
/// ```
-fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b']') => {
tokenizer.consume();
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
_ => continuation(tokenizer),
}
@@ -827,15 +827,15 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State {
/// > | <![CDATA[>&<]]>
/// ^
/// ```
-fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.consume();
- State::Fn(Box::new(continuation_close))
+ State::Fn(StateName::HtmlFlowContinuationClose)
}
Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {
tokenizer.consume();
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
_ => continuation(tokenizer),
}
@@ -847,7 +847,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {
/// > | <!doctype>
/// ^
/// ```
-fn continuation_close(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::HtmlFlowData);
@@ -855,7 +855,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(continuation_close))
+ State::Fn(StateName::HtmlFlowContinuationClose)
}
}
}
@@ -866,7 +866,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State {
/// > | <!doctype>
/// ^
/// ```
-fn continuation_after(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::HtmlFlow);
tokenizer.tokenize_state.marker = 0;
// Feel free to interrupt.
@@ -883,9 +883,9 @@ fn continuation_after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// |
/// ```
-fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
+pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(blank_line))
+ State::Fn(StateName::BlankLineStart)
}
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index a4c0349..1c1f9e6 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -57,7 +57,7 @@
use crate::constant::HTML_CDATA_PREFIX;
use crate::construct::partial_space_or_tab::space_or_tab;
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of HTML (text)
///
@@ -70,7 +70,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::HtmlText);
tokenizer.enter(Token::HtmlTextData);
tokenizer.consume();
- State::Fn(Box::new(open))
+ State::Fn(StateName::HtmlTextOpen)
} else {
State::Nok
}
@@ -86,24 +86,24 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a <!--b--> c
/// ^
/// ```
-fn open(tokenizer: &mut Tokenizer) -> State {
+pub fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'!') => {
tokenizer.consume();
- State::Fn(Box::new(declaration_open))
+ State::Fn(StateName::HtmlTextDeclarationOpen)
}
Some(b'/') => {
tokenizer.consume();
- State::Fn(Box::new(tag_close_start))
+ State::Fn(StateName::HtmlTextTagCloseStart)
}
Some(b'?') => {
tokenizer.consume();
- State::Fn(Box::new(instruction))
+ State::Fn(StateName::HtmlTextInstruction)
}
// ASCII alphabetical.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open))
+ State::Fn(StateName::HtmlTextTagOpen)
}
_ => State::Nok,
}
@@ -119,20 +119,20 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> c
/// ^
/// ```
-fn declaration_open(tokenizer: &mut Tokenizer) -> State {
+pub fn declaration_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(comment_open_inside))
+ State::Fn(StateName::HtmlTextCommentOpenInside)
}
// ASCII alphabetical.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(declaration))
+ State::Fn(StateName::HtmlTextDeclaration)
}
Some(b'[') => {
tokenizer.consume();
- State::Fn(Box::new(cdata_open_inside))
+ State::Fn(StateName::HtmlTextCdataOpenInside)
}
_ => State::Nok,
}
@@ -144,11 +144,11 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {
/// > | a <!--b--> c
/// ^
/// ```
-fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(comment_start))
+ State::Fn(StateName::HtmlTextCommentStart)
}
_ => State::Nok,
}
@@ -167,12 +167,12 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
/// ```
///
/// [html_flow]: crate::construct::html_flow
-fn comment_start(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => State::Nok,
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(comment_start_dash))
+ State::Fn(StateName::HtmlTextCommentStartDash)
}
_ => comment(tokenizer),
}
@@ -191,7 +191,7 @@ fn comment_start(tokenizer: &mut Tokenizer) -> State {
/// ```
///
/// [html_flow]: crate::construct::html_flow
-fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => State::Nok,
_ => comment(tokenizer),
@@ -204,20 +204,20 @@ fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
/// > | a <!--b--> c
/// ^
/// ```
-fn comment(tokenizer: &mut Tokenizer) -> State {
+pub fn comment(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(comment));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextComment);
+ line_ending_before(tokenizer)
}
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(comment_close))
+ State::Fn(StateName::HtmlTextCommentClose)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(comment))
+ State::Fn(StateName::HtmlTextComment)
}
}
}
@@ -228,11 +228,11 @@ fn comment(tokenizer: &mut Tokenizer) -> State {
/// > | a <!--b--> c
/// ^
/// ```
-fn comment_close(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(end))
+ State::Fn(StateName::HtmlTextEnd)
}
_ => comment(tokenizer),
}
@@ -244,16 +244,16 @@ fn comment_close(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^^^^^^
/// ```
-fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() {
tokenizer.tokenize_state.size = 0;
- State::Fn(Box::new(cdata))
+ State::Fn(StateName::HtmlTextCdata)
} else {
- State::Fn(Box::new(cdata_open_inside))
+ State::Fn(StateName::HtmlTextCdataOpenInside)
}
} else {
State::Nok
@@ -266,20 +266,20 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^^^
/// ```
-fn cdata(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(cdata));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextCdata);
+ line_ending_before(tokenizer)
}
Some(b']') => {
tokenizer.consume();
- State::Fn(Box::new(cdata_close))
+ State::Fn(StateName::HtmlTextCdataClose)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(cdata))
+ State::Fn(StateName::HtmlTextCdata)
}
}
}
@@ -290,11 +290,11 @@ fn cdata(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^
/// ```
-fn cdata_close(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b']') => {
tokenizer.consume();
- State::Fn(Box::new(cdata_end))
+ State::Fn(StateName::HtmlTextCdataEnd)
}
_ => cdata(tokenizer),
}
@@ -306,7 +306,7 @@ fn cdata_close(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^
/// ```
-fn cdata_end(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => end(tokenizer),
Some(b']') => cdata_close(tokenizer),
@@ -320,16 +320,16 @@ fn cdata_end(tokenizer: &mut Tokenizer) -> State {
/// > | a <!b> c
/// ^
/// ```
-fn declaration(tokenizer: &mut Tokenizer) -> State {
+pub fn declaration(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'>') => end(tokenizer),
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(declaration));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextDeclaration);
+ line_ending_before(tokenizer)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(declaration))
+ State::Fn(StateName::HtmlTextDeclaration)
}
}
}
@@ -340,20 +340,20 @@ fn declaration(tokenizer: &mut Tokenizer) -> State {
/// > | a <?b?> c
/// ^
/// ```
-fn instruction(tokenizer: &mut Tokenizer) -> State {
+pub fn instruction(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(instruction));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextInstruction);
+ line_ending_before(tokenizer)
}
Some(b'?') => {
tokenizer.consume();
- State::Fn(Box::new(instruction_close))
+ State::Fn(StateName::HtmlTextInstructionClose)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(instruction))
+ State::Fn(StateName::HtmlTextInstruction)
}
}
}
@@ -364,7 +364,7 @@ fn instruction(tokenizer: &mut Tokenizer) -> State {
/// > | a <?b?> c
/// ^
/// ```
-fn instruction_close(tokenizer: &mut Tokenizer) -> State {
+pub fn instruction_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => end(tokenizer),
_ => instruction(tokenizer),
@@ -377,12 +377,12 @@ fn instruction_close(tokenizer: &mut Tokenizer) -> State {
/// > | a </b> c
/// ^
/// ```
-fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphabetical.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_close))
+ State::Fn(StateName::HtmlTextTagClose)
}
_ => State::Nok,
}
@@ -394,12 +394,12 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
/// > | a </b> c
/// ^
/// ```
-fn tag_close(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumerical and `-`.
Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_close))
+ State::Fn(StateName::HtmlTextTagClose)
}
_ => tag_close_between(tokenizer),
}
@@ -411,15 +411,15 @@ fn tag_close(tokenizer: &mut Tokenizer) -> State {
/// > | a </b> c
/// ^
/// ```
-fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_close_between));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagCloseBetween);
+ line_ending_before(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(tag_close_between))
+ State::Fn(StateName::HtmlTextTagCloseBetween)
}
_ => end(tokenizer),
}
@@ -431,12 +431,12 @@ fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
/// > | a <b> c
/// ^
/// ```
-fn tag_open(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumerical and `-`.
Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open))
+ State::Fn(StateName::HtmlTextTagOpen)
}
Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),
_ => State::Nok,
@@ -449,24 +449,24 @@ fn tag_open(tokenizer: &mut Tokenizer) -> State {
/// > | a <b> c
/// ^
/// ```
-fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_between));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagOpenBetween);
+ line_ending_before(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_between))
+ State::Fn(StateName::HtmlTextTagOpenBetween)
}
Some(b'/') => {
tokenizer.consume();
- State::Fn(Box::new(end))
+ State::Fn(StateName::HtmlTextEnd)
}
// ASCII alphabetical and `:` and `_`.
Some(b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_name))
+ State::Fn(StateName::HtmlTextTagOpenAttributeName)
}
_ => end(tokenizer),
}
@@ -478,12 +478,12 @@ fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c> d
/// ^
/// ```
-fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphabetical and `-`, `.`, `:`, and `_`.
Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_name))
+ State::Fn(StateName::HtmlTextTagOpenAttributeName)
}
_ => tag_open_attribute_name_after(tokenizer),
}
@@ -496,19 +496,20 @@ fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c> d
/// ^
/// ```
-fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_name_after));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state =
+ Some(StateName::HtmlTextTagOpenAttributeNameAfter);
+ line_ending_before(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_name_after))
+ State::Fn(StateName::HtmlTextTagOpenAttributeNameAfter)
}
Some(b'=') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_before))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueBefore)
}
_ => tag_open_between(tokenizer),
}
@@ -521,25 +522,26 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c=d> e
/// ^
/// ```
-fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok,
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_before));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state =
+ Some(StateName::HtmlTextTagOpenAttributeValueBefore);
+ line_ending_before(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_before))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueBefore)
}
Some(b'"' | b'\'') => {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_quoted))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueQuoted)
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_unquoted))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueUnquoted)
}
}
}
@@ -550,24 +552,25 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c="d"> e
/// ^
/// ```
-fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => {
tokenizer.tokenize_state.marker = 0;
State::Nok
}
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_quoted));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state =
+ Some(StateName::HtmlTextTagOpenAttributeValueQuoted);
+ line_ending_before(tokenizer)
}
Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.tokenize_state.marker = 0;
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_quoted_after))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueQuotedAfter)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_quoted))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueQuoted)
}
}
}
@@ -578,13 +581,13 @@ fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c=d> e
/// ^
/// ```
-fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'"' | b'\'' | b'<' | b'=' | b'`') => State::Nok,
Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_unquoted))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueUnquoted)
}
}
}
@@ -596,7 +599,7 @@ fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c="d"> e
/// ^
/// ```
-fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => tag_open_between(tokenizer),
_ => State::Nok,
@@ -609,7 +612,7 @@ fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c="d"> e
/// ^
/// ```
-fn end(tokenizer: &mut Tokenizer) -> State {
+pub fn end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.consume();
@@ -631,14 +634,14 @@ fn end(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | b-->
/// ```
-fn at_line_ending(tokenizer: &mut Tokenizer) -> State {
+pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.exit(Token::HtmlTextData);
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(after_line_ending))
+ State::Fn(StateName::HtmlTextLineEndingAfter)
}
_ => unreachable!("expected eol"),
}
@@ -654,8 +657,9 @@ fn at_line_ending(tokenizer: &mut Tokenizer) -> State {
/// > | b-->
/// ^
/// ```
-fn after_line_ending(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab(), after_line_ending_prefix)(tokenizer)
+pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::HtmlTextLineEndingAfterPrefix)
}
/// After a line ending, after indent.
@@ -668,8 +672,9 @@ fn after_line_ending(tokenizer: &mut Tokenizer) -> State {
/// > | b-->
/// ^
/// ```
-fn after_line_ending_prefix(tokenizer: &mut Tokenizer) -> State {
- let return_state = tokenizer.tokenize_state.return_state.take().unwrap();
+pub fn line_ending_after_prefix(tokenizer: &mut Tokenizer) -> State {
+ let state_name = tokenizer.tokenize_state.return_state.take().unwrap();
+ let func = state_name.to_func();
tokenizer.enter(Token::HtmlTextData);
- return_state(tokenizer)
+ func(tokenizer)
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index b38e15a..ae9fe77 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -147,12 +147,9 @@
//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX;
-use crate::construct::{
- partial_destination::start as destination, partial_label::start as label,
- partial_space_or_tab::space_or_tab_eol, partial_title::start as title,
-};
+use crate::construct::partial_space_or_tab::space_or_tab_eol;
use crate::token::Token;
-use crate::tokenizer::{Event, EventType, Media, State, Tokenizer};
+use crate::tokenizer::{Event, EventType, Media, State, StateName, Tokenizer};
use crate::util::{
normalize_identifier::normalize_identifier,
skip,
@@ -204,7 +201,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(Token::LabelMarker);
tokenizer.exit(Token::LabelEnd);
- return State::Fn(Box::new(after));
+ return State::Fn(StateName::LabelEndAfter);
}
}
@@ -223,7 +220,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | [a] b
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
let start = &tokenizer.label_start_stack[tokenizer.tokenize_state.start];
let defined = tokenizer
.parse_state
@@ -240,19 +237,23 @@ fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Resource (`[asd](fgh)`)?
- Some(b'(') => tokenizer.attempt(resource, move |is_ok| {
- Box::new(if is_ok || defined { ok } else { nok })
- })(tokenizer),
+ Some(b'(') => tokenizer.attempt(StateName::LabelEndResourceStart, move |is_ok| {
+ State::Fn(if is_ok || defined {
+ StateName::LabelEndOk
+ } else {
+ StateName::LabelEndNok
+ })
+ }),
// Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference?
- Some(b'[') => tokenizer.attempt(full_reference, move |is_ok| {
- Box::new(if is_ok {
- ok
+ Some(b'[') => tokenizer.attempt(StateName::LabelEndReferenceFull, move |is_ok| {
+ State::Fn(if is_ok {
+ StateName::LabelEndOk
} else if defined {
- reference_not_full
+ StateName::LabelEndReferenceNotFull
} else {
- nok
+ StateName::LabelEndNok
})
- })(tokenizer),
+ }),
// Shortcut (`[asd]`) reference?
_ => {
let func = if defined { ok } else { nok };
@@ -271,10 +272,14 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// > | [a] b
/// ^
/// ```
-fn reference_not_full(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(collapsed_reference, |is_ok| {
- Box::new(if is_ok { ok } else { nok })
- })(tokenizer)
+pub fn reference_not_full(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(StateName::LabelEndReferenceCollapsed, |is_ok| {
+ State::Fn(if is_ok {
+ StateName::LabelEndOk
+ } else {
+ StateName::LabelEndNok
+ })
+ })
}
/// Done, we found something.
@@ -289,7 +294,7 @@ fn reference_not_full(tokenizer: &mut Tokenizer) -> State {
/// > | [a] b
/// ^
/// ```
-fn ok(tokenizer: &mut Tokenizer) -> State {
+pub fn ok(tokenizer: &mut Tokenizer) -> State {
let label_start_index = tokenizer.tokenize_state.start;
// Remove this one and everything after it.
let mut left = tokenizer.label_start_stack.split_off(label_start_index);
@@ -332,7 +337,7 @@ fn ok(tokenizer: &mut Tokenizer) -> State {
/// > | [a] b
/// ^
/// ```
-fn nok(tokenizer: &mut Tokenizer) -> State {
+pub fn nok(tokenizer: &mut Tokenizer) -> State {
tokenizer
.label_start_stack
.get_mut(tokenizer.tokenize_state.start)
@@ -349,14 +354,14 @@ fn nok(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) c
/// ^
/// ```
-fn resource(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'(') => {
tokenizer.enter(Token::Resource);
tokenizer.enter(Token::ResourceMarker);
tokenizer.consume();
tokenizer.exit(Token::ResourceMarker);
- State::Fn(Box::new(resource_start))
+ State::Fn(StateName::LabelEndResourceBefore)
}
_ => unreachable!("expected `(`"),
}
@@ -368,8 +373,9 @@ fn resource(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) c
/// ^
/// ```
-fn resource_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab_eol(), resource_open)(tokenizer)
+pub fn resource_before(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_eol(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::LabelEndResourceOpen)
}
/// At the start of a resource, after optional whitespace.
@@ -378,7 +384,7 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) c
/// ^
/// ```
-fn resource_open(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_open(tokenizer: &mut Tokenizer) -> State {
if let Some(b')') = tokenizer.current {
resource_end(tokenizer)
} else {
@@ -389,13 +395,13 @@ fn resource_open(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_5 = Token::ResourceDestinationString;
tokenizer.tokenize_state.size_other = RESOURCE_DESTINATION_BALANCE_MAX;
- tokenizer.attempt(destination, |ok| {
- Box::new(if ok {
- destination_after
+ tokenizer.attempt(StateName::DestinationStart, |ok| {
+ State::Fn(if ok {
+ StateName::LabelEndResourceDestinationAfter
} else {
- destination_missing
+ StateName::LabelEndResourceDestinationMissing
})
- })(tokenizer)
+ })
}
}
@@ -405,21 +411,26 @@ fn resource_open(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) c
/// ^
/// ```
-fn destination_after(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
tokenizer.tokenize_state.token_4 = Token::Data;
tokenizer.tokenize_state.token_5 = Token::Data;
tokenizer.tokenize_state.size_other = 0;
-
- tokenizer.attempt(space_or_tab_eol(), |ok| {
- Box::new(if ok { resource_between } else { resource_end })
- })(tokenizer)
+ let state_name = space_or_tab_eol(tokenizer);
+
+ tokenizer.attempt(state_name, |ok| {
+ State::Fn(if ok {
+ StateName::LabelEndResourceBetween
+ } else {
+ StateName::LabelEndResourceEnd
+ })
+ })
}
/// Without destination.
-fn destination_missing(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
@@ -435,13 +446,13 @@ fn destination_missing(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b ) c
/// ^
/// ```
-fn resource_between(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'"' | b'\'' | b'(') => {
tokenizer.tokenize_state.token_1 = Token::ResourceTitle;
tokenizer.tokenize_state.token_2 = Token::ResourceTitleMarker;
tokenizer.tokenize_state.token_3 = Token::ResourceTitleString;
- tokenizer.go(title, title_after)(tokenizer)
+ tokenizer.go(StateName::TitleStart, StateName::LabelEndResourceTitleAfter)
}
_ => resource_end(tokenizer),
}
@@ -453,11 +464,12 @@ fn resource_between(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b "c") d
/// ^
/// ```
-fn title_after(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
- tokenizer.attempt_opt(space_or_tab_eol(), resource_end)(tokenizer)
+ let state_name = space_or_tab_eol(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::LabelEndResourceEnd)
}
/// In a resource, at the `)`.
@@ -466,7 +478,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) d
/// ^
/// ```
-fn resource_end(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b')') => {
tokenizer.enter(Token::ResourceMarker);
@@ -485,13 +497,13 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State {
/// > | [a][b] d
/// ^
/// ```
-fn full_reference(tokenizer: &mut Tokenizer) -> State {
+pub fn reference_full(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[') => {
tokenizer.tokenize_state.token_1 = Token::Reference;
tokenizer.tokenize_state.token_2 = Token::ReferenceMarker;
tokenizer.tokenize_state.token_3 = Token::ReferenceString;
- tokenizer.go(label, full_reference_after)(tokenizer)
+ tokenizer.go(StateName::LabelStart, StateName::LabelEndReferenceFullAfter)
}
_ => unreachable!("expected `[`"),
}
@@ -503,7 +515,7 @@ fn full_reference(tokenizer: &mut Tokenizer) -> State {
/// > | [a][b] d
/// ^
/// ```
-fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
+pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
@@ -541,14 +553,14 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
/// > | [a][] d
/// ^
/// ```
-fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
+pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[') => {
tokenizer.enter(Token::Reference);
tokenizer.enter(Token::ReferenceMarker);
tokenizer.consume();
tokenizer.exit(Token::ReferenceMarker);
- State::Fn(Box::new(collapsed_reference_open))
+ State::Fn(StateName::LabelEndReferenceCollapsedOpen)
}
_ => State::Nok,
}
@@ -562,7 +574,7 @@ fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
/// > | [a][] d
/// ^
/// ```
-fn collapsed_reference_open(tokenizer: &mut Tokenizer) -> State {
+pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b']') => {
tokenizer.enter(Token::ReferenceMarker);
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 4a3508e..4fcf8c2 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -30,7 +30,7 @@
use super::label_end::resolve_media;
use crate::token::Token;
-use crate::tokenizer::{LabelStart, State, Tokenizer};
+use crate::tokenizer::{LabelStart, State, StateName, Tokenizer};
/// Start of label (image) start.
///
@@ -45,7 +45,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LabelImageMarker);
tokenizer.consume();
tokenizer.exit(Token::LabelImageMarker);
- State::Fn(Box::new(open))
+ State::Fn(StateName::LabelStartImageOpen)
}
_ => State::Nok,
}
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 0e12b7c..6ecfb04 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -45,12 +45,9 @@
//! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure
use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};
-use crate::construct::{
- blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max,
- thematic_break::start as thematic_break,
-};
+use crate::construct::partial_space_or_tab::space_or_tab_min_max;
use crate::token::Token;
-use crate::tokenizer::{EventType, State, Tokenizer};
+use crate::tokenizer::{EventType, State, StateName, Tokenizer};
use crate::util::{
skip,
slice::{Position, Slice},
@@ -65,17 +62,16 @@ use crate::util::{
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.list {
tokenizer.enter(Token::ListItem);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::ListBefore)
} else {
State::Nok
}
@@ -87,12 +83,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Unordered.
- Some(b'*' | b'-') => tokenizer.check(thematic_break, |ok| {
- Box::new(if ok { nok } else { before_unordered })
- })(tokenizer),
+ Some(b'*' | b'-') => tokenizer.check(StateName::ThematicBreakStart, |ok| {
+ State::Fn(if ok {
+ StateName::ListNok
+ } else {
+ StateName::ListBeforeUnordered
+ })
+ }),
Some(b'+') => before_unordered(tokenizer),
// Ordered.
Some(b'0'..=b'9') if !tokenizer.interrupt => before_ordered(tokenizer),
@@ -109,7 +109,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn before_unordered(tokenizer: &mut Tokenizer) -> State {
+pub fn before_unordered(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::ListItemPrefix);
marker(tokenizer)
}
@@ -120,10 +120,10 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn before_ordered(tokenizer: &mut Tokenizer) -> State {
+pub fn before_ordered(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::ListItemPrefix);
tokenizer.enter(Token::ListItemValue);
- inside(tokenizer)
+ value(tokenizer)
}
/// In an ordered list item value.
@@ -132,7 +132,7 @@ fn before_ordered(tokenizer: &mut Tokenizer) -> State {
/// > | 1. a
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn value(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'.' | b')') if !tokenizer.interrupt || tokenizer.tokenize_state.size < 2 => {
tokenizer.exit(Token::ListItemValue);
@@ -141,7 +141,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
Some(b'0'..=b'9') if tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(inside))
+ State::Fn(StateName::ListValue)
}
_ => {
tokenizer.tokenize_state.size = 0;
@@ -158,11 +158,11 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
/// > | 1. b
/// ^
/// ```
-fn marker(tokenizer: &mut Tokenizer) -> State {
+pub fn marker(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::ListItemMarker);
tokenizer.consume();
tokenizer.exit(Token::ListItemMarker);
- State::Fn(Box::new(marker_after))
+ State::Fn(StateName::ListMarkerAfter)
}
/// After a list item marker.
@@ -173,11 +173,15 @@ fn marker(tokenizer: &mut Tokenizer) -> State {
/// > | 1. b
/// ^
/// ```
-fn marker_after(tokenizer: &mut Tokenizer) -> State {
+pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size = 1;
- tokenizer.check(blank_line, |ok| {
- Box::new(if ok { after } else { marker_after_not_blank })
- })(tokenizer)
+ tokenizer.check(StateName::BlankLineStart, |ok| {
+ State::Fn(if ok {
+ StateName::ListAfter
+ } else {
+ StateName::ListMarkerAfterFilled
+ })
+ })
}
/// After a list item marker, not followed by a blank line.
@@ -186,13 +190,17 @@ fn marker_after(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn marker_after_not_blank(tokenizer: &mut Tokenizer) -> State {
+pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size = 0;
// Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace.
- tokenizer.attempt(whitespace, |ok| {
- Box::new(if ok { after } else { prefix_other })
- })(tokenizer)
+ tokenizer.attempt(StateName::ListWhitespace, |ok| {
+ State::Fn(if ok {
+ StateName::ListAfter
+ } else {
+ StateName::ListPrefixOther
+ })
+ })
}
/// In whitespace after a marker.
@@ -201,8 +209,9 @@ fn marker_after_not_blank(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn whitespace(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(space_or_tab_min_max(1, TAB_SIZE), whitespace_after)(tokenizer)
+pub fn whitespace(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_min_max(tokenizer, 1, TAB_SIZE);
+ tokenizer.go(state_name, StateName::ListWhitespaceAfter)
}
/// After acceptable whitespace.
@@ -211,7 +220,7 @@ fn whitespace(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
+pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\t' | b' ') = tokenizer.current {
State::Nok
} else {
@@ -225,13 +234,13 @@ fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn prefix_other(tokenizer: &mut Tokenizer) -> State {
+pub fn prefix_other(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.enter(Token::SpaceOrTab);
tokenizer.consume();
tokenizer.exit(Token::SpaceOrTab);
- State::Fn(Box::new(after))
+ State::Fn(StateName::ListAfter)
}
_ => State::Nok,
}
@@ -243,7 +252,7 @@ fn prefix_other(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
let blank = tokenizer.tokenize_state.size == 1;
tokenizer.tokenize_state.size = 0;
@@ -285,10 +294,14 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// > | b
/// ^
/// ```
-pub fn cont(tokenizer: &mut Tokenizer) -> State {
- tokenizer.check(blank_line, |ok| {
- Box::new(if ok { blank_cont } else { not_blank_cont })
- })(tokenizer)
+pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.check(StateName::BlankLineStart, |ok| {
+ State::Fn(if ok {
+ StateName::ListContBlank
+ } else {
+ StateName::ListContFilled
+ })
+ })
}
/// Start of blank list item continuation.
@@ -299,15 +312,16 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | b
/// ```
-pub fn blank_cont(tokenizer: &mut Tokenizer) -> State {
+pub fn cont_blank(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.as_ref().unwrap();
let size = container.size;
if container.blank_initial {
State::Nok
} else {
+ let state_name = space_or_tab_min_max(tokenizer, 0, size);
// Consume, optionally, at most `size`.
- tokenizer.go(space_or_tab_min_max(0, size), ok)(tokenizer)
+ tokenizer.go(state_name, StateName::ListOk)
}
}
@@ -318,14 +332,15 @@ pub fn blank_cont(tokenizer: &mut Tokenizer) -> State {
/// > | b
/// ^
/// ```
-pub fn not_blank_cont(tokenizer: &mut Tokenizer) -> State {
+pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.as_mut().unwrap();
let size = container.size;
container.blank_initial = false;
// Consume exactly `size`.
- tokenizer.go(space_or_tab_min_max(size, size), ok)(tokenizer)
+ let state_name = space_or_tab_min_max(tokenizer, size, size);
+ tokenizer.go(state_name, StateName::ListOk)
}
/// A state fn to yield [`State::Ok`].
@@ -334,16 +349,16 @@ pub fn ok(_tokenizer: &mut Tokenizer) -> State {
}
/// A state fn to yield [`State::Nok`].
-fn nok(_tokenizer: &mut Tokenizer) -> State {
+pub fn nok(_tokenizer: &mut Tokenizer) -> State {
State::Nok
}
/// Find adjacent list items with the same marker.
pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
- let mut index = 0;
- let mut balance = 0;
let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
+ let mut index = 0;
+ let mut balance = 0;
// Merge list items.
while index < tokenizer.events.len() {
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 7fdaa66..de750f4 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -33,7 +33,7 @@
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
use crate::token::Token;
-use crate::tokenizer::{ContentType, EventType, State, Tokenizer};
+use crate::tokenizer::{ContentType, EventType, State, StateName, Tokenizer};
use crate::util::skip::opt as skip_opt;
/// Before a paragraph.
@@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | abc
/// ^^^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -71,7 +71,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(inside))
+ State::Fn(StateName::ParagraphInside)
}
}
}
diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs
index 2257bfd..b32b7f9 100644
--- a/src/construct/partial_bom.rs
+++ b/src/construct/partial_bom.rs
@@ -11,7 +11,7 @@
//! * [`micromark/lib/preprocess.js` in `micromark`](https://github.com/micromark/micromark/blob/ed23453/packages/micromark/dev/lib/preprocess.js#L54-L60)
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
@@ -36,7 +36,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | 0xEF 0xBB 0xBF
/// ^^^^ ^^^^ ^^^^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(BOM[tokenizer.tokenize_state.size]) {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
@@ -45,7 +45,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size = 0;
State::Ok
} else {
- State::Fn(Box::new(inside))
+ State::Fn(StateName::BomInside)
}
} else {
tokenizer.tokenize_state.size = 0;
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 0365489..1cb5e61 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -7,7 +7,7 @@
//! [text]: crate::content::text
use crate::token::Token;
-use crate::tokenizer::{EventType, State, Tokenizer};
+use crate::tokenizer::{EventType, State, StateName, Tokenizer};
/// At the beginning of data.
///
@@ -17,10 +17,11 @@ use crate::tokenizer::{EventType, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
+ // Make sure to eat the first `stop`.
Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {
tokenizer.enter(Token::Data);
tokenizer.consume();
- State::Fn(Box::new(data))
+ State::Fn(StateName::DataInside)
}
_ => at_break(tokenizer),
}
@@ -32,14 +33,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | abc
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(at_break))
+ State::Fn(StateName::DataAtBreak)
}
Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {
tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data));
@@ -47,7 +48,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.enter(Token::Data);
- data(tokenizer)
+ inside(tokenizer)
}
}
}
@@ -58,7 +59,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// > | abc
/// ^^^
/// ```
-fn data(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
let done = match tokenizer.current {
None | Some(b'\n') => true,
Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => true,
@@ -70,7 +71,7 @@ fn data(tokenizer: &mut Tokenizer) -> State {
at_break(tokenizer)
} else {
tokenizer.consume();
- State::Fn(Box::new(data))
+ State::Fn(StateName::DataInside)
}
}
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index f1cfc7d..e8818a0 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -72,7 +72,7 @@
//! [sanitize_uri]: crate::util::sanitize_uri
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
/// Before a destination.
///
@@ -90,7 +90,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
tokenizer.consume();
tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
- State::Fn(Box::new(enclosed_before))
+ State::Fn(StateName::DestinationEnclosedBefore)
}
// ASCII control, space, closing paren, but *not* `\0`.
None | Some(0x01..=0x1F | b' ' | b')' | 0x7F) => State::Nok,
@@ -110,7 +110,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | <aa>
/// ^
/// ```
-fn enclosed_before(tokenizer: &mut Tokenizer) -> State {
+pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State {
if let Some(b'>') = tokenizer.current {
tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
tokenizer.consume();
@@ -131,7 +131,7 @@ fn enclosed_before(tokenizer: &mut Tokenizer) -> State {
/// > | <aa>
/// ^
/// ```
-fn enclosed(tokenizer: &mut Tokenizer) -> State {
+pub fn enclosed(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n' | b'<') => State::Nok,
Some(b'>') => {
@@ -141,11 +141,11 @@ fn enclosed(tokenizer: &mut Tokenizer) -> State {
}
Some(b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(enclosed_escape))
+ State::Fn(StateName::DestinationEnclosedEscape)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(enclosed))
+ State::Fn(StateName::DestinationEnclosed)
}
}
}
@@ -156,11 +156,11 @@ fn enclosed(tokenizer: &mut Tokenizer) -> State {
/// > | <a\*a>
/// ^
/// ```
-fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {
+pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'<' | b'>' | b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(enclosed))
+ State::Fn(StateName::DestinationEnclosed)
}
_ => enclosed(tokenizer),
}
@@ -172,7 +172,7 @@ fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {
/// > | aa
/// ^
/// ```
-fn raw(tokenizer: &mut Tokenizer) -> State {
+pub fn raw(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\t' | b'\n' | b' ' | b')') if tokenizer.tokenize_state.size == 0 => {
tokenizer.exit(Token::Data);
@@ -185,7 +185,7 @@ fn raw(tokenizer: &mut Tokenizer) -> State {
Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_other => {
tokenizer.consume();
tokenizer.tokenize_state.size += 1;
- State::Fn(Box::new(raw))
+ State::Fn(StateName::DestinationRaw)
}
// ASCII control (but *not* `\0`) and space and `(`.
None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) => {
@@ -195,15 +195,15 @@ fn raw(tokenizer: &mut Tokenizer) -> State {
Some(b')') => {
tokenizer.consume();
tokenizer.tokenize_state.size -= 1;
- State::Fn(Box::new(raw))
+ State::Fn(StateName::DestinationRaw)
}
Some(b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(raw_escape))
+ State::Fn(StateName::DestinationRawEscape)
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(raw))
+ State::Fn(StateName::DestinationRaw)
}
}
}
@@ -214,11 +214,11 @@ fn raw(tokenizer: &mut Tokenizer) -> State {
/// > | a\*a
/// ^
/// ```
-fn raw_escape(tokenizer: &mut Tokenizer) -> State {
+pub fn raw_escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'(' | b')' | b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(raw))
+ State::Fn(StateName::DestinationRaw)
}
_ => raw(tokenizer),
}
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 0e1c2ec..0c8366e 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -62,7 +62,7 @@ use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::constant::LINK_REFERENCE_SIZE_MAX;
use crate::subtokenize::link;
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
/// Before a label.
///
@@ -78,7 +78,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
- State::Fn(Box::new(at_break))
+ State::Fn(StateName::LabelAtBreak)
}
_ => State::Nok,
}
@@ -90,7 +90,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | [a]
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX
|| matches!(tokenizer.current, None | Some(b'['))
|| (matches!(tokenizer.current, Some(b']')) && !tokenizer.tokenize_state.seen)
@@ -101,13 +101,22 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
State::Nok
} else {
match tokenizer.current {
- Some(b'\n') => tokenizer.attempt(
- space_or_tab_eol_with_options(EolOptions {
- content_type: Some(ContentType::String),
- connect: tokenizer.tokenize_state.connect,
- }),
- |ok| Box::new(if ok { after_eol } else { at_blank_line }),
- )(tokenizer),
+ Some(b'\n') => {
+ let state_name = space_or_tab_eol_with_options(
+ tokenizer,
+ EolOptions {
+ content_type: Some(ContentType::String),
+ connect: tokenizer.tokenize_state.connect,
+ },
+ );
+ tokenizer.attempt(state_name, |ok| {
+ State::Fn(if ok {
+ StateName::LabelEolAfter
+ } else {
+ StateName::LabelAtBlankLine
+ })
+ })
+ }
Some(b']') => {
tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
@@ -129,20 +138,20 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.connect = true;
}
- label(tokenizer)
+ inside(tokenizer)
}
}
}
}
/// To do.
-fn after_eol(tokenizer: &mut Tokenizer) -> State {
+pub fn eol_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.connect = true;
at_break(tokenizer)
}
/// To do.
-fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
+pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.connect = false;
State::Nok
@@ -154,7 +163,7 @@ fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
/// > | [a]
/// ^
/// ```
-fn label(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n' | b'[' | b']') => {
tokenizer.exit(Token::Data);
@@ -165,13 +174,16 @@ fn label(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::Data);
at_break(tokenizer)
} else {
- let func = if matches!(byte, b'\\') { escape } else { label };
tokenizer.consume();
tokenizer.tokenize_state.size += 1;
if !tokenizer.tokenize_state.seen && !matches!(byte, b'\t' | b' ') {
tokenizer.tokenize_state.seen = true;
}
- State::Fn(Box::new(func))
+ State::Fn(if matches!(byte, b'\\') {
+ StateName::LabelEscape
+ } else {
+ StateName::LabelInside
+ })
}
}
}
@@ -183,13 +195,13 @@ fn label(tokenizer: &mut Tokenizer) -> State {
/// > | [a\*a]
/// ^
/// ```
-fn escape(tokenizer: &mut Tokenizer) -> State {
+pub fn escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[' | b'\\' | b']') => {
tokenizer.consume();
tokenizer.tokenize_state.size += 1;
- State::Fn(Box::new(label))
+ State::Fn(StateName::LabelInside)
}
- _ => label(tokenizer),
+ _ => inside(tokenizer),
}
}
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
index 6005a6c..6d5cd7a 100644
--- a/src/construct/partial_non_lazy_continuation.rs
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -11,7 +11,7 @@
//! [html_flow]: crate::construct::html_flow
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of continuation.
///
@@ -26,7 +26,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(after))
+ State::Fn(StateName::NonLazyContinuationAfter)
}
_ => State::Nok,
}
@@ -39,7 +39,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | b
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
if tokenizer.lazy {
State::Nok
} else {
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index e3eac45..b0b35a6 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -6,7 +6,7 @@
use crate::subtokenize::link;
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, StateFn, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
/// Options to parse `space_or_tab`.
#[derive(Debug)]
@@ -37,8 +37,8 @@ pub struct EolOptions {
/// ```bnf
/// space_or_tab ::= 1*( ' ' '\t' )
/// ```
-pub fn space_or_tab() -> Box<StateFn> {
- space_or_tab_min_max(1, usize::MAX)
+pub fn space_or_tab(tokenizer: &mut Tokenizer) -> StateName {
+ space_or_tab_min_max(tokenizer, 1, usize::MAX)
}
/// Between `x` and `y` `space_or_tab`.
@@ -46,26 +46,27 @@ pub fn space_or_tab() -> Box<StateFn> {
/// ```bnf
/// space_or_tab_min_max ::= x*y( ' ' '\t' )
/// ```
-pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
- space_or_tab_with_options(Options {
- kind: Token::SpaceOrTab,
- min,
- max,
- content_type: None,
- connect: false,
- })
+pub fn space_or_tab_min_max(tokenizer: &mut Tokenizer, min: usize, max: usize) -> StateName {
+ space_or_tab_with_options(
+ tokenizer,
+ Options {
+ kind: Token::SpaceOrTab,
+ min,
+ max,
+ content_type: None,
+ connect: false,
+ },
+ )
}
/// `space_or_tab`, with the given options.
-pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
- Box::new(|tokenizer| {
- tokenizer.tokenize_state.space_or_tab_connect = options.connect;
- tokenizer.tokenize_state.space_or_tab_content_type = options.content_type;
- tokenizer.tokenize_state.space_or_tab_min = options.min;
- tokenizer.tokenize_state.space_or_tab_max = options.max;
- tokenizer.tokenize_state.space_or_tab_token = options.kind;
- start(tokenizer)
- })
+pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName {
+ tokenizer.tokenize_state.space_or_tab_connect = options.connect;
+ tokenizer.tokenize_state.space_or_tab_content_type = options.content_type;
+ tokenizer.tokenize_state.space_or_tab_min = options.min;
+ tokenizer.tokenize_state.space_or_tab_max = options.max;
+ tokenizer.tokenize_state.space_or_tab_token = options.kind;
+ StateName::SpaceOrTabStart
}
/// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and
@@ -74,41 +75,21 @@ pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
/// ```bnf
/// space_or_tab_eol ::= 1*( ' ' '\t' ) | 0*( ' ' '\t' ) eol 0*( ' ' '\t' )
/// ```
-pub fn space_or_tab_eol() -> Box<StateFn> {
- space_or_tab_eol_with_options(EolOptions {
- content_type: None,
- connect: false,
- })
+pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName {
+ space_or_tab_eol_with_options(
+ tokenizer,
+ EolOptions {
+ content_type: None,
+ connect: false,
+ },
+ )
}
/// `space_or_tab_eol`, with the given options.
-pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
- Box::new(move |tokenizer| {
- tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type;
- tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect;
-
- tokenizer.attempt(
- space_or_tab_with_options(Options {
- kind: Token::SpaceOrTab,
- min: 1,
- max: usize::MAX,
- content_type: tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .clone(),
- connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
- }),
- move |ok| {
- Box::new(move |tokenizer| {
- if ok {
- tokenizer.tokenize_state.space_or_tab_eol_ok = ok;
- }
-
- after_space_or_tab(tokenizer)
- })
- },
- )(tokenizer)
- })
+pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: EolOptions) -> StateName {
+ tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type;
+ tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect;
+ StateName::SpaceOrTabEolStart
}
/// Before `space_or_tab`.
@@ -117,7 +98,7 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
/// > | a␠␠b
/// ^
/// ```
-fn start(tokenizer: &mut Tokenizer) -> State {
+pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') if tokenizer.tokenize_state.space_or_tab_max > 0 => {
tokenizer.enter_with_content(
@@ -144,7 +125,7 @@ fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a␠␠b
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ')
if tokenizer.tokenize_state.space_or_tab_size
@@ -152,7 +133,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
{
tokenizer.consume();
tokenizer.tokenize_state.space_or_tab_size += 1;
- State::Fn(Box::new(inside))
+ State::Fn(StateName::SpaceOrTabInside)
}
_ => {
tokenizer.exit(tokenizer.tokenize_state.space_or_tab_token.clone());
@@ -167,7 +148,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
/// > | a␠␠b
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
let state = if tokenizer.tokenize_state.space_or_tab_size
>= tokenizer.tokenize_state.space_or_tab_min
{
@@ -184,6 +165,44 @@ fn after(tokenizer: &mut Tokenizer) -> State {
state
}
+pub fn eol_start(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_with_options(
+ tokenizer,
+ Options {
+ kind: Token::SpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ content_type: tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .clone(),
+ connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
+ },
+ );
+
+ tokenizer.attempt(state_name, move |ok| {
+ State::Fn(if ok {
+ StateName::SpaceOrTabEolAfterFirst
+ } else {
+ StateName::SpaceOrTabEolAtEol
+ })
+ })
+}
+
+pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.space_or_tab_eol_ok = true;
+
+ if tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .is_some()
+ {
+ tokenizer.tokenize_state.space_or_tab_eol_connect = true;
+ }
+
+ eol_at_eol(tokenizer)
+}
+
/// `space_or_tab_eol`: after optionally first `space_or_tab`.
///
/// ```markdown
@@ -191,16 +210,7 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | b
/// ```
-fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.tokenize_state.space_or_tab_eol_ok
- && tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .is_some()
- {
- tokenizer.tokenize_state.space_or_tab_eol_connect = true;
- }
-
+pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\n') = tokenizer.current {
tokenizer.enter_with_content(
Token::LineEnding,
@@ -223,17 +233,17 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(after_eol))
+ State::Fn(StateName::SpaceOrTabEolAfterEol)
} else {
- let state = if tokenizer.tokenize_state.space_or_tab_eol_ok {
- State::Ok
- } else {
- State::Nok
- };
+ let ok = tokenizer.tokenize_state.space_or_tab_eol_ok;
tokenizer.tokenize_state.space_or_tab_eol_content_type = None;
tokenizer.tokenize_state.space_or_tab_eol_connect = false;
tokenizer.tokenize_state.space_or_tab_eol_ok = false;
- state
+ if ok {
+ State::Ok
+ } else {
+ State::Nok
+ }
}
}
@@ -245,9 +255,10 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
#[allow(clippy::needless_pass_by_value)]
-fn after_eol(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(
- space_or_tab_with_options(Options {
+pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_with_options(
+ tokenizer,
+ Options {
kind: Token::SpaceOrTab,
min: 1,
max: usize::MAX,
@@ -256,9 +267,9 @@ fn after_eol(tokenizer: &mut Tokenizer) -> State {
.space_or_tab_eol_content_type
.clone(),
connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
- }),
- after_more_space_or_tab,
- )(tokenizer)
+ },
+ );
+ tokenizer.attempt_opt(state_name, StateName::SpaceOrTabEolAfterMore)
}
/// `space_or_tab_eol`: after more (optional) `space_or_tab`.
@@ -268,7 +279,7 @@ fn after_eol(tokenizer: &mut Tokenizer) -> State {
/// > | b
/// ^
/// ```
-fn after_more_space_or_tab(tokenizer: &mut Tokenizer) -> State {
+pub fn eol_after_more(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.space_or_tab_eol_content_type = None;
tokenizer.tokenize_state.space_or_tab_eol_connect = false;
tokenizer.tokenize_state.space_or_tab_eol_ok = false;
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 6bf9099..8b72608 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -30,10 +30,10 @@
//! [character_reference]: crate::construct::character_reference
//! [label_end]: crate::construct::label_end
-use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
+use crate::construct::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::subtokenize::link;
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
/// Before a title.
///
@@ -50,7 +50,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
tokenizer.consume();
tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
- State::Fn(Box::new(begin))
+ State::Fn(StateName::TitleBegin)
}
_ => State::Nok,
}
@@ -64,7 +64,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | "a"
/// ^
/// ```
-fn begin(tokenizer: &mut Tokenizer) -> State {
+pub fn begin(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'"' | b'\'' | b')')
if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
@@ -90,20 +90,30 @@ fn begin(tokenizer: &mut Tokenizer) -> State {
/// > | "a"
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => {
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.connect = false;
State::Nok
}
- Some(b'\n') => tokenizer.attempt(
- space_or_tab_eol_with_options(EolOptions {
- content_type: Some(ContentType::String),
- connect: tokenizer.tokenize_state.connect,
- }),
- |ok| Box::new(if ok { after_eol } else { at_blank_line }),
- )(tokenizer),
+ Some(b'\n') => {
+ let state_name = space_or_tab_eol_with_options(
+ tokenizer,
+ EolOptions {
+ content_type: Some(ContentType::String),
+ connect: tokenizer.tokenize_state.connect,
+ },
+ );
+
+ tokenizer.attempt(state_name, |ok| {
+ State::Fn(if ok {
+ StateName::TitleAfterEol
+ } else {
+ StateName::TitleAtBlankLine
+ })
+ })
+ }
Some(b'"' | b'\'' | b')')
if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
{
@@ -120,19 +130,19 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.connect = true;
}
- title(tokenizer)
+ inside(tokenizer)
}
}
}
/// To do.
-fn after_eol(tokenizer: &mut Tokenizer) -> State {
+pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.connect = true;
at_break(tokenizer)
}
/// To do.
-fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
+pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.connect = false;
State::Nok
@@ -144,7 +154,7 @@ fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
/// > | "a"
/// ^
/// ```
-fn title(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -157,9 +167,12 @@ fn title(tokenizer: &mut Tokenizer) -> State {
at_break(tokenizer)
}
Some(byte) => {
- let func = if matches!(byte, b'\\') { escape } else { title };
tokenizer.consume();
- State::Fn(Box::new(func))
+ State::Fn(if matches!(byte, b'\\') {
+ StateName::TitleEscape
+ } else {
+ StateName::TitleInside
+ })
}
}
}
@@ -170,12 +183,12 @@ fn title(tokenizer: &mut Tokenizer) -> State {
/// > | "a\*b"
/// ^
/// ```
-fn escape(tokenizer: &mut Tokenizer) -> State {
+pub fn escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'"' | b'\'' | b')') => {
tokenizer.consume();
- State::Fn(Box::new(title))
+ State::Fn(StateName::TitleInside)
}
- _ => title(tokenizer),
+ _ => inside(tokenizer),
}
}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 2ed2046..4ed25b6 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -51,7 +51,7 @@
use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of a thematic break.
///
@@ -62,17 +62,17 @@ use crate::tokenizer::{State, Tokenizer};
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.thematic_break {
tokenizer.enter(Token::ThematicBreak);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+
+ tokenizer.go(state_name, StateName::ThematicBreakBefore)
} else {
State::Nok
}
@@ -84,7 +84,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | ***
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'*' | b'-' | b'_') => {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
@@ -100,7 +100,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | ***
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {
tokenizer.tokenize_state.marker = 0;
@@ -130,18 +130,19 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// > | ***
/// ^
/// ```
-fn sequence(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'*' | b'-' | b'_')
if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
{
tokenizer.consume();
tokenizer.tokenize_state.size += 1;
- State::Fn(Box::new(sequence))
+ State::Fn(StateName::ThematicBreakSequence)
}
_ => {
tokenizer.exit(Token::ThematicBreakSequence);
- tokenizer.attempt_opt(space_or_tab(), at_break)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::ThematicBreakAtBreak)
}
}
}