aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-09 10:45:15 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-09 10:45:15 +0200
commit4ce1ac9e41cafa9051377470e8a246063f7d9b1a (patch)
treed678d9583764b2706fe7ea4876e91e40609f15b0
parent8ffed1822bcbc1b6ce6647b840fb03996b0635ea (diff)
downloadmarkdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.tar.gz
markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.tar.bz2
markdown-rs-4ce1ac9e41cafa9051377470e8a246063f7d9b1a.zip
Rewrite algorithm to not pass around boxed functions
* Pass state names from an enum around instead of boxed functions * Refactor to simplify attempts a lot * Use a subtokenizer for the the `document` content type
-rw-r--r--src/compiler.rs59
-rw-r--r--src/construct/attention.rs6
-rw-r--r--src/construct/autolink.rs44
-rw-r--r--src/construct/blank_line.rs7
-rw-r--r--src/construct/block_quote.rs52
-rw-r--r--src/construct/character_escape.rs7
-rw-r--r--src/construct/character_reference.rs18
-rw-r--r--src/construct/code_fenced.rs140
-rw-r--r--src/construct/code_indented.rs51
-rw-r--r--src/construct/code_text.rs18
-rw-r--r--src/construct/definition.rs72
-rw-r--r--src/construct/hard_break_escape.rs6
-rw-r--r--src/construct/heading_atx.rs49
-rw-r--r--src/construct/heading_setext.rs35
-rw-r--r--src/construct/html_flow.rs188
-rw-r--r--src/construct/html_text.rs183
-rw-r--r--src/construct/label_end.rs110
-rw-r--r--src/construct/label_start_image.rs4
-rw-r--r--src/construct/list.rs119
-rw-r--r--src/construct/paragraph.rs6
-rw-r--r--src/construct/partial_bom.rs6
-rw-r--r--src/construct/partial_data.rs15
-rw-r--r--src/construct/partial_destination.rs30
-rw-r--r--src/construct/partial_label.rs50
-rw-r--r--src/construct/partial_non_lazy_continuation.rs6
-rw-r--r--src/construct/partial_space_or_tab.rs171
-rw-r--r--src/construct/partial_title.rs55
-rw-r--r--src/construct/thematic_break.rs35
-rw-r--r--src/content/document.rs562
-rw-r--r--src/content/flow.rs57
-rw-r--r--src/content/string.rs28
-rw-r--r--src/content/text.rs45
-rw-r--r--src/subtokenize.rs13
-rw-r--r--src/tokenizer.rs1012
34 files changed, 2065 insertions, 1194 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index b86fd82..57ab40a 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -482,28 +482,45 @@ fn on_enter_list(context: &mut CompileContext) {
// Blank line directly in list or directly in list item,
// but not a blank line after an empty list item.
if balance < 3 && event.token_type == Token::BlankLineEnding {
- let at_marker = balance == 2
- && events[skip::opt_back(
- events,
- index - 2,
- &[Token::BlankLineEnding, Token::SpaceOrTab],
- )]
- .token_type
- == Token::ListItemPrefix;
- let at_list_item = balance == 1 && events[index - 2].token_type == Token::ListItem;
- let at_empty_list_item = if at_list_item {
- let before_item = skip::opt_back(events, index - 2, &[Token::ListItem]);
- let before_prefix = skip::opt_back(
- events,
- index - 3,
- &[Token::ListItemPrefix, Token::SpaceOrTab],
- );
- before_item + 1 == before_prefix
- } else {
- false
- };
+ let mut at_marker = false;
+
+ if balance == 2 {
+ let mut before = index - 2;
+
+ if events[before].token_type == Token::SpaceOrTab {
+ before -= 2;
+ }
+
+ if events[before].token_type == Token::ListItemPrefix {
+ at_marker = true;
+ }
+ }
+
+ let mut at_empty_list_item = false;
+ let mut at_empty_block_quote = false;
+
+ if balance == 1 {
+ let mut before = index - 2;
+
+ if events[before].token_type == Token::SpaceOrTab {
+ before -= 2;
+ }
+
+ if events[before].token_type == Token::ListItem
+ && events[before - 1].token_type == Token::ListItemPrefix
+ {
+ at_empty_list_item = true;
+ }
+
+ if events[before].token_type == Token::ListItem
+ && events[before - 1].token_type == Token::BlockQuote
+ && events[before - 2].token_type == Token::BlockQuotePrefix
+ {
+ at_empty_block_quote = true;
+ }
+ }
- if !at_marker && !at_list_item && !at_empty_list_item {
+ if !at_marker && !at_empty_list_item && !at_empty_block_quote {
loose = true;
break;
}
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index fc2acfb..5a98a89 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -52,7 +52,7 @@
//! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element
use crate::token::Token;
-use crate::tokenizer::{Event, EventType, Point, State, Tokenizer};
+use crate::tokenizer::{Event, EventType, Point, State, StateName, Tokenizer};
use crate::unicode::PUNCTUATION;
use crate::util::slice::Slice;
@@ -132,11 +132,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | **
/// ^^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.consume();
- State::Fn(Box::new(inside))
+ State::Fn(StateName::AttentionInside)
}
_ => {
tokenizer.exit(Token::AttentionSequence);
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 1444c61..15bfac1 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -103,7 +103,7 @@
use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of an autolink.
///
@@ -121,7 +121,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(Token::AutolinkMarker);
tokenizer.enter(Token::AutolinkProtocol);
- State::Fn(Box::new(open))
+ State::Fn(StateName::AutolinkOpen)
}
_ => State::Nok,
}
@@ -135,12 +135,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a<user@example.com>b
/// ^
/// ```
-fn open(tokenizer: &mut Tokenizer) -> State {
+pub fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphabetic.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(scheme_or_email_atext))
+ State::Fn(StateName::AutolinkSchemeOrEmailAtext)
}
_ => email_atext(tokenizer),
}
@@ -154,7 +154,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | a<user@example.com>b
/// ^
/// ```
-fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
+pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumeric and `+`, `-`, and `.`.
Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
@@ -174,12 +174,12 @@ fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
/// > | a<user@example.com>b
/// ^
/// ```
-fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
+pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b':') => {
tokenizer.consume();
tokenizer.tokenize_state.size = 0;
- State::Fn(Box::new(url_inside))
+ State::Fn(StateName::AutolinkUrlInside)
}
// ASCII alphanumeric and `+`, `-`, and `.`.
Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
@@ -187,7 +187,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
{
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(scheme_inside_or_email_atext))
+ State::Fn(StateName::AutolinkSchemeInsideOrEmailAtext)
}
_ => {
tokenizer.tokenize_state.size = 0;
@@ -202,7 +202,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
/// > | a<https://example.com>b
/// ^
/// ```
-fn url_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn url_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.exit(Token::AutolinkProtocol);
@@ -212,7 +212,7 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {
None | Some(b'\0'..=0x1F | b' ' | b'<' | 0x7F) => State::Nok,
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(url_inside))
+ State::Fn(StateName::AutolinkUrlInside)
}
}
}
@@ -223,11 +223,11 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {
/// > | a<user.name@example.com>b
/// ^
/// ```
-fn email_atext(tokenizer: &mut Tokenizer) -> State {
+pub fn email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'@') => {
tokenizer.consume();
- State::Fn(Box::new(email_at_sign_or_dot))
+ State::Fn(StateName::AutolinkEmailAtSignOrDot)
}
// ASCII atext.
//
@@ -250,7 +250,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~',
) => {
tokenizer.consume();
- State::Fn(Box::new(email_atext))
+ State::Fn(StateName::AutolinkEmailAtext)
}
_ => State::Nok,
}
@@ -262,7 +262,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
/// > | a<user.name@example.com>b
/// ^ ^
/// ```
-fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {
+pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumeric.
Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => email_value(tokenizer),
@@ -276,12 +276,12 @@ fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {
/// > | a<user.name@example.com>b
/// ^
/// ```
-fn email_label(tokenizer: &mut Tokenizer) -> State {
+pub fn email_label(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'.') => {
tokenizer.tokenize_state.size = 0;
tokenizer.consume();
- State::Fn(Box::new(email_at_sign_or_dot))
+ State::Fn(StateName::AutolinkEmailAtSignOrDot)
}
Some(b'>') => {
tokenizer.tokenize_state.size = 0;
@@ -304,20 +304,20 @@ fn email_label(tokenizer: &mut Tokenizer) -> State {
/// > | a<user.name@ex-ample.com>b
/// ^
/// ```
-fn email_value(tokenizer: &mut Tokenizer) -> State {
+pub fn email_value(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumeric or `-`.
Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
if tokenizer.tokenize_state.size < AUTOLINK_DOMAIN_SIZE_MAX =>
{
- let func = if matches!(tokenizer.current, Some(b'-')) {
- email_value
+ let state_name = if matches!(tokenizer.current, Some(b'-')) {
+ StateName::AutolinkEmailValue
} else {
- email_label
+ StateName::AutolinkEmailLabel
};
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(func))
+ State::Fn(state_name)
}
_ => {
tokenizer.tokenize_state.size = 0;
@@ -334,7 +334,7 @@ fn email_value(tokenizer: &mut Tokenizer) -> State {
/// > | a<user@example.com>b
/// ^
/// ```
-fn end(tokenizer: &mut Tokenizer) -> State {
+pub fn end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.enter(Token::AutolinkMarker);
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index c4eacf5..b12c2c4 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -33,7 +33,7 @@
//! [flow]: crate::content::flow
use crate::construct::partial_space_or_tab::space_or_tab;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of a blank line.
///
@@ -46,7 +46,8 @@ use crate::tokenizer::{State, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab(), after)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::BlankLineAfter)
}
/// After zero or more spaces or tabs, before a line ending or EOF.
@@ -57,7 +58,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | ␊
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => State::Ok,
_ => State::Nok,
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 7e4753d..df58d62 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -36,7 +36,7 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::space_or_tab_min_max;
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of block quote.
///
@@ -45,13 +45,17 @@ use crate::tokenizer::{State, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- let max = if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- };
if tokenizer.parse_state.constructs.block_quote {
- tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::BlockQuoteBefore)
} else {
State::Nok
}
@@ -63,7 +67,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | > a
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.enter(Token::BlockQuote);
@@ -80,13 +84,17 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-pub fn cont(tokenizer: &mut Tokenizer) -> State {
- let max = if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- };
- tokenizer.go(space_or_tab_min_max(0, max), cont_before)(tokenizer)
+pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::BlockQuoteContBefore)
}
/// After whitespace, before `>`.
@@ -96,14 +104,14 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-fn cont_before(tokenizer: &mut Tokenizer) -> State {
+pub fn cont_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.enter(Token::BlockQuotePrefix);
tokenizer.enter(Token::BlockQuoteMarker);
tokenizer.consume();
tokenizer.exit(Token::BlockQuoteMarker);
- State::Fn(Box::new(cont_after))
+ State::Fn(StateName::BlockQuoteContAfter)
}
_ => State::Nok,
}
@@ -117,15 +125,13 @@ fn cont_before(tokenizer: &mut Tokenizer) -> State {
/// > | >b
/// ^
/// ```
-fn cont_after(tokenizer: &mut Tokenizer) -> State {
+pub fn cont_after(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\t' | b' ') = tokenizer.current {
tokenizer.enter(Token::SpaceOrTab);
tokenizer.consume();
tokenizer.exit(Token::SpaceOrTab);
- tokenizer.exit(Token::BlockQuotePrefix);
- State::Ok
- } else {
- tokenizer.exit(Token::BlockQuotePrefix);
- State::Ok
}
+
+ tokenizer.exit(Token::BlockQuotePrefix);
+ State::Ok
}
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index 4419d7a..de09f17 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -34,7 +34,7 @@
//! [hard_break_escape]: crate::construct::hard_break_escape
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of a character escape.
///
@@ -49,7 +49,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::CharacterEscapeMarker);
tokenizer.consume();
tokenizer.exit(Token::CharacterEscapeMarker);
- State::Fn(Box::new(inside))
+ State::Fn(StateName::CharacterEscapeInside)
}
_ => State::Nok,
}
@@ -61,7 +61,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a\*b
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+// StateName::CharacterEscapeInside
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII punctuation.
Some(b'!'..=b'/' | b':'..=b'@' | b'['..=b'`' | b'{'..=b'~') => {
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 7cc74ba..ba05fab 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -66,7 +66,7 @@ use crate::constant::{
CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, CHARACTER_REFERENCE_NAMED_SIZE_MAX,
};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
use crate::util::slice::Slice;
/// Start of a character reference.
@@ -86,7 +86,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::CharacterReferenceMarker);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarker);
- State::Fn(Box::new(open))
+ State::Fn(StateName::CharacterReferenceOpen)
}
_ => State::Nok,
}
@@ -103,12 +103,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a&#x9;b
/// ^
/// ```
-fn open(tokenizer: &mut Tokenizer) -> State {
+// StateName::CharacterReferenceOpen
+pub fn open(tokenizer: &mut Tokenizer) -> State {
if let Some(b'#') = tokenizer.current {
tokenizer.enter(Token::CharacterReferenceMarkerNumeric);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarkerNumeric);
- State::Fn(Box::new(numeric))
+ State::Fn(StateName::CharacterReferenceNumeric)
} else {
tokenizer.tokenize_state.marker = b'&';
tokenizer.enter(Token::CharacterReferenceValue);
@@ -125,14 +126,15 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | a&#x9;b
/// ^
/// ```
-fn numeric(tokenizer: &mut Tokenizer) -> State {
+// StateName::CharacterReferenceNumeric
+pub fn numeric(tokenizer: &mut Tokenizer) -> State {
if let Some(b'x' | b'X') = tokenizer.current {
tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal);
tokenizer.enter(Token::CharacterReferenceValue);
tokenizer.tokenize_state.marker = b'x';
- State::Fn(Box::new(value))
+ State::Fn(StateName::CharacterReferenceValue)
} else {
tokenizer.enter(Token::CharacterReferenceValue);
tokenizer.tokenize_state.marker = b'#';
@@ -154,7 +156,7 @@ fn numeric(tokenizer: &mut Tokenizer) -> State {
/// > | a&#x9;b
/// ^
/// ```
-fn value(tokenizer: &mut Tokenizer) -> State {
+pub fn value(tokenizer: &mut Tokenizer) -> State {
if matches!(tokenizer.current, Some(b';')) && tokenizer.tokenize_state.size > 0 {
// Named.
if tokenizer.tokenize_state.marker == b'&' {
@@ -200,7 +202,7 @@ fn value(tokenizer: &mut Tokenizer) -> State {
if tokenizer.tokenize_state.size < max && test(&byte) {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- return State::Fn(Box::new(value));
+ return State::Fn(StateName::CharacterReferenceValue);
}
}
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index a22a0f9..46c5f9f 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -102,12 +102,9 @@
//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
-use crate::construct::{
- partial_non_lazy_continuation::start as partial_non_lazy_continuation,
- partial_space_or_tab::{space_or_tab, space_or_tab_min_max},
-};
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
use crate::util::slice::{Position, Slice};
/// Start of fenced code.
@@ -122,17 +119,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.code_fenced {
tokenizer.enter(Token::CodeFenced);
tokenizer.enter(Token::CodeFencedFence);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before_sequence_open,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceOpen)
} else {
State::Nok
}
@@ -146,7 +142,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
+pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
let tail = tokenizer.events.last();
let mut prefix = 0;
@@ -178,16 +174,17 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn sequence_open(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(sequence_open))
+ State::Fn(StateName::CodeFencedSequenceOpen)
}
_ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => {
tokenizer.exit(Token::CodeFencedFenceSequence);
- tokenizer.attempt_opt(space_or_tab(), info_before)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::CodeFencedInfoBefore)
}
_ => {
tokenizer.tokenize_state.marker = 0;
@@ -206,7 +203,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn info_before(tokenizer: &mut Tokenizer) -> State {
+pub fn info_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
@@ -217,7 +214,7 @@ fn info_before(tokenizer: &mut Tokenizer) -> State {
_ => {
tokenizer.enter(Token::CodeFencedFenceInfo);
tokenizer.enter_with_content(Token::Data, Some(ContentType::String));
- info_inside(tokenizer)
+ info(tokenizer)
}
}
}
@@ -230,7 +227,7 @@ fn info_before(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn info_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn info(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -243,7 +240,8 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State {
Some(b'\t' | b' ') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceInfo);
- tokenizer.attempt_opt(space_or_tab(), meta_before)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::CodeFencedMetaBefore)
}
Some(b'`') if tokenizer.tokenize_state.marker == b'`' => {
tokenizer.concrete = false;
@@ -254,7 +252,7 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State {
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(info_inside))
+ State::Fn(StateName::CodeFencedInfo)
}
}
}
@@ -267,7 +265,7 @@ fn info_inside(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn meta_before(tokenizer: &mut Tokenizer) -> State {
+pub fn meta_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
@@ -291,7 +289,7 @@ fn meta_before(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn meta(tokenizer: &mut Tokenizer) -> State {
+pub fn meta(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -310,7 +308,7 @@ fn meta(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(meta))
+ State::Fn(StateName::CodeFencedMeta)
}
}
}
@@ -324,10 +322,14 @@ fn meta(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
- tokenizer.check(partial_non_lazy_continuation, |ok| {
- Box::new(if ok { at_non_lazy_break } else { after })
- })(tokenizer)
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.check(StateName::NonLazyContinuationStart, |ok| {
+ State::Fn(if ok {
+ StateName::CodeFencedAtNonLazyBreak
+ } else {
+ StateName::CodeFencedAfter
+ })
+ })
}
/// At an eol/eof in code, before a non-lazy closing fence or content.
@@ -339,10 +341,14 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(close_begin, |ok| {
- Box::new(if ok { after } else { content_before })
- })(tokenizer)
+pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(StateName::CodeFencedCloseBefore, |ok| {
+ State::Fn(if ok {
+ StateName::CodeFencedAfter
+ } else {
+ StateName::CodeFencedContentBefore
+ })
+ })
}
/// Before a closing fence, at the line ending.
@@ -353,13 +359,13 @@ fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn close_begin(tokenizer: &mut Tokenizer) -> State {
+pub fn close_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(close_start))
+ State::Fn(StateName::CodeFencedCloseStart)
}
_ => unreachable!("expected eol"),
}
@@ -373,19 +379,18 @@ fn close_begin(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_start(tokenizer: &mut Tokenizer) -> State {
+pub fn close_start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::CodeFencedFence);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- close_before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::CodeFencedBeforeSequenceClose)
}
/// In a closing fence, after optional whitespace, before sequence.
@@ -396,11 +401,11 @@ fn close_start(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_before(tokenizer: &mut Tokenizer) -> State {
+pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.enter(Token::CodeFencedFenceSequence);
- close_sequence(tokenizer)
+ sequence_close(tokenizer)
}
_ => State::Nok,
}
@@ -414,19 +419,20 @@ fn close_before(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_sequence(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.tokenize_state.size_other += 1;
tokenizer.consume();
- State::Fn(Box::new(close_sequence))
+ State::Fn(StateName::CodeFencedSequenceClose)
}
_ if tokenizer.tokenize_state.size_other >= CODE_FENCED_SEQUENCE_SIZE_MIN
&& tokenizer.tokenize_state.size_other >= tokenizer.tokenize_state.size =>
{
tokenizer.tokenize_state.size_other = 0;
tokenizer.exit(Token::CodeFencedFenceSequence);
- tokenizer.attempt_opt(space_or_tab(), close_sequence_after)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::CodeFencedAfterSequenceClose)
}
_ => {
tokenizer.tokenize_state.size_other = 0;
@@ -443,7 +449,7 @@ fn close_sequence(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_sequence_after(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
@@ -461,11 +467,11 @@ fn close_sequence_after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn content_before(tokenizer: &mut Tokenizer) -> State {
+pub fn content_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(content_start))
+ State::Fn(StateName::CodeFencedContentStart)
}
/// Before code content, definitely not before a closing fence.
///
@@ -475,11 +481,9 @@ fn content_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn content_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(
- space_or_tab_min_max(0, tokenizer.tokenize_state.prefix),
- content_begin,
- )(tokenizer)
+pub fn content_start(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_min_max(tokenizer, 0, tokenizer.tokenize_state.prefix);
+ tokenizer.go(state_name, StateName::CodeFencedBeforeContentChunk)
}
/// Before code content, after a prefix.
@@ -490,12 +494,12 @@ fn content_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn content_begin(tokenizer: &mut Tokenizer) -> State {
+pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => at_break(tokenizer),
_ => {
tokenizer.enter(Token::CodeFlowChunk);
- content_continue(tokenizer)
+ content_chunk(tokenizer)
}
}
}
@@ -508,7 +512,7 @@ fn content_begin(tokenizer: &mut Tokenizer) -> State {
/// ^^^^^^^^^^^^^^
/// | ~~~
/// ```
-fn content_continue(tokenizer: &mut Tokenizer) -> State {
+pub fn content_chunk(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFlowChunk);
@@ -516,7 +520,7 @@ fn content_continue(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(content_continue))
+ State::Fn(StateName::CodeFencedContentChunk)
}
}
}
@@ -529,7 +533,7 @@ fn content_continue(tokenizer: &mut Tokenizer) -> State {
/// > | ~~~
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::CodeFenced);
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.prefix = 0;
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 81a3080..516b493 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -48,7 +48,7 @@
use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::TAB_SIZE;
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of code (indented).
///
@@ -64,7 +64,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
// Do not interrupt paragraphs.
if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented {
tokenizer.enter(Token::CodeIndented);
- tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer)
+ let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE);
+ tokenizer.go(state_name, StateName::CodeIndentedAtBreak)
} else {
State::Nok
}
@@ -76,15 +77,19 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | aaa
/// ^ ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => after(tokenizer),
- Some(b'\n') => tokenizer.attempt(further_start, |ok| {
- Box::new(if ok { at_break } else { after })
- })(tokenizer),
+ Some(b'\n') => tokenizer.attempt(StateName::CodeIndentedFurtherStart, |ok| {
+ State::Fn(if ok {
+ StateName::CodeIndentedAtBreak
+ } else {
+ StateName::CodeIndentedAfter
+ })
+ }),
_ => {
tokenizer.enter(Token::CodeFlowChunk);
- content(tokenizer)
+ inside(tokenizer)
}
}
}
@@ -95,7 +100,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// > | aaa
/// ^^^^
/// ```
-fn content(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFlowChunk);
@@ -103,7 +108,7 @@ fn content(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(content))
+ State::Fn(StateName::CodeIndentedInside)
}
}
}
@@ -114,7 +119,7 @@ fn content(tokenizer: &mut Tokenizer) -> State {
/// > | aaa
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::CodeIndented);
// Feel free to interrupt.
tokenizer.interrupt = false;
@@ -128,17 +133,24 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | bbb
/// ```
-fn further_start(tokenizer: &mut Tokenizer) -> State {
+pub fn further_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') if !tokenizer.lazy => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(further_start))
+ State::Fn(StateName::CodeIndentedFurtherStart)
+ }
+ _ if !tokenizer.lazy => {
+ let state_name = space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE);
+ tokenizer.attempt(state_name, |ok| {
+ State::Fn(if ok {
+ StateName::CodeIndentedFurtherEnd
+ } else {
+ StateName::CodeIndentedFurtherBegin
+ })
+ })
}
- _ if !tokenizer.lazy => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
- Box::new(if ok { further_end } else { further_begin })
- })(tokenizer),
_ => State::Nok,
}
}
@@ -150,7 +162,7 @@ fn further_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | bbb
/// ```
-fn further_end(_tokenizer: &mut Tokenizer) -> State {
+pub fn further_end(_tokenizer: &mut Tokenizer) -> State {
State::Ok
}
@@ -161,8 +173,9 @@ fn further_end(_tokenizer: &mut Tokenizer) -> State {
/// > | bbb
/// ^
/// ```
-fn further_begin(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab(), further_after)(tokenizer)
+pub fn further_begin(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::CodeIndentedFurtherAfter)
}
/// After whitespace, not indented enough.
@@ -172,7 +185,7 @@ fn further_begin(tokenizer: &mut Tokenizer) -> State {
/// > | bbb
/// ^
/// ```
-fn further_after(tokenizer: &mut Tokenizer) -> State {
+pub fn further_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => further_start(tokenizer),
_ => State::Nok,
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index 31777f4..5bdefbb 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -84,7 +84,7 @@
//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of code (text).
///
@@ -117,11 +117,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | `a`
/// ^
/// ```
-fn sequence_open(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
if let Some(b'`') = tokenizer.current {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(sequence_open))
+ State::Fn(StateName::CodeTextSequenceOpen)
} else {
tokenizer.exit(Token::CodeTextSequence);
between(tokenizer)
@@ -134,7 +134,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {
/// > | `a`
/// ^^
/// ```
-fn between(tokenizer: &mut Tokenizer) -> State {
+pub fn between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => {
tokenizer.tokenize_state.size = 0;
@@ -144,7 +144,7 @@ fn between(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(between))
+ State::Fn(StateName::CodeTextBetween)
}
Some(b'`') => {
tokenizer.enter(Token::CodeTextSequence);
@@ -163,7 +163,7 @@ fn between(tokenizer: &mut Tokenizer) -> State {
/// > | `a`
/// ^
/// ```
-fn data(tokenizer: &mut Tokenizer) -> State {
+pub fn data(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n' | b'`') => {
tokenizer.exit(Token::CodeTextData);
@@ -171,7 +171,7 @@ fn data(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(data))
+ State::Fn(StateName::CodeTextData)
}
}
}
@@ -182,12 +182,12 @@ fn data(tokenizer: &mut Tokenizer) -> State {
/// > | `a`
/// ^
/// ```
-fn sequence_close(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`') => {
tokenizer.tokenize_state.size_other += 1;
tokenizer.consume();
- State::Fn(Box::new(sequence_close))
+ State::Fn(StateName::CodeTextSequenceClose)
}
_ => {
if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_other {
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index a56dab4..fbad99d 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -93,14 +93,9 @@
//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
-use crate::construct::{
- partial_destination::start as destination,
- partial_label::start as label,
- partial_space_or_tab::{space_or_tab, space_or_tab_eol},
- partial_title::start as title,
-};
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_eol};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
use crate::util::skip::opt_back as skip_opt_back;
/// At the start of a definition.
@@ -124,7 +119,8 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
if possible && tokenizer.parse_state.constructs.definition {
tokenizer.enter(Token::Definition);
// Note: arbitrary whitespace allowed even if code (indented) is on.
- tokenizer.attempt_opt(space_or_tab(), before)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::DefinitionBefore)
} else {
State::Nok
}
@@ -136,13 +132,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[') => {
tokenizer.tokenize_state.token_1 = Token::DefinitionLabel;
tokenizer.tokenize_state.token_2 = Token::DefinitionLabelMarker;
tokenizer.tokenize_state.token_3 = Token::DefinitionLabelString;
- tokenizer.go(label, label_after)(tokenizer)
+ tokenizer.go(StateName::LabelStart, StateName::DefinitionLabelAfter)
}
_ => State::Nok,
}
@@ -154,7 +150,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn label_after(tokenizer: &mut Tokenizer) -> State {
+pub fn label_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
@@ -164,34 +160,38 @@ fn label_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::DefinitionMarker);
tokenizer.consume();
tokenizer.exit(Token::DefinitionMarker);
- State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab_eol(), destination_before),
- ))
+ State::Fn(StateName::DefinitionMarkerAfter)
}
_ => State::Nok,
}
}
+/// To do.
+pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_eol(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::DefinitionDestinationBefore)
+}
+
/// Before a destination.
///
/// ```markdown
/// > | [a]: b "c"
/// ^
/// ```
-fn destination_before(tokenizer: &mut Tokenizer) -> State {
+pub fn destination_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::DefinitionDestination;
tokenizer.tokenize_state.token_2 = Token::DefinitionDestinationLiteral;
tokenizer.tokenize_state.token_3 = Token::DefinitionDestinationLiteralMarker;
tokenizer.tokenize_state.token_4 = Token::DefinitionDestinationRaw;
tokenizer.tokenize_state.token_5 = Token::DefinitionDestinationString;
tokenizer.tokenize_state.size_other = usize::MAX;
- tokenizer.attempt(destination, |ok| {
- Box::new(if ok {
- destination_after
+ tokenizer.attempt(StateName::DestinationStart, |ok| {
+ State::Fn(if ok {
+ StateName::DefinitionDestinationAfter
} else {
- destination_missing
+ StateName::DefinitionDestinationMissing
})
- })(tokenizer)
+ })
}
/// After a destination.
@@ -200,18 +200,18 @@ fn destination_before(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn destination_after(tokenizer: &mut Tokenizer) -> State {
+pub fn destination_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
tokenizer.tokenize_state.token_4 = Token::Data;
tokenizer.tokenize_state.token_5 = Token::Data;
tokenizer.tokenize_state.size_other = 0;
- tokenizer.attempt_opt(title_before, after)(tokenizer)
+ tokenizer.attempt_opt(StateName::DefinitionTitleBefore, StateName::DefinitionAfter)
}
/// Without destination.
-fn destination_missing(tokenizer: &mut Tokenizer) -> State {
+pub fn destination_missing(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
@@ -229,8 +229,9 @@ fn destination_missing(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab(), after_whitespace)(tokenizer)
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::DefinitionAfterWhitespace)
}
/// After a definition, after optional whitespace.
@@ -241,7 +242,7 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
+pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Definition);
@@ -261,8 +262,9 @@ fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn title_before(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(space_or_tab_eol(), title_before_marker)(tokenizer)
+pub fn title_before(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_eol(tokenizer);
+ tokenizer.go(state_name, StateName::DefinitionTitleBeforeMarker)
}
/// Before a title, after a line ending.
@@ -272,11 +274,11 @@ fn title_before(tokenizer: &mut Tokenizer) -> State {
/// > | "c"
/// ^
/// ```
-fn title_before_marker(tokenizer: &mut Tokenizer) -> State {
+pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::DefinitionTitle;
tokenizer.tokenize_state.token_2 = Token::DefinitionTitleMarker;
tokenizer.tokenize_state.token_3 = Token::DefinitionTitleString;
- tokenizer.go(title, title_after)(tokenizer)
+ tokenizer.go(StateName::TitleStart, StateName::DefinitionTitleAfter)
}
/// After a title.
@@ -285,11 +287,15 @@ fn title_before_marker(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn title_after(tokenizer: &mut Tokenizer) -> State {
+pub fn title_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
- tokenizer.attempt_opt(space_or_tab(), title_after_after_optional_whitespace)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(
+ state_name,
+ StateName::DefinitionTitleAfterOptionalWhitespace,
+ )
}
/// After a title, after optional whitespace.
@@ -298,7 +304,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {
/// > | [a]: b "c"
/// ^
/// ```
-fn title_after_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State {
+pub fn title_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => State::Ok,
_ => State::Nok,
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index d09bf54..47b7e94 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -40,7 +40,7 @@
//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of a hard break (escape).
///
@@ -54,7 +54,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => {
tokenizer.enter(Token::HardBreakEscape);
tokenizer.consume();
- State::Fn(Box::new(after))
+ State::Fn(StateName::HardBreakEscapeAfter)
}
_ => State::Nok,
}
@@ -67,7 +67,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | b
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.exit(Token::HardBreakEscape);
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 6751567..45c4758 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -57,7 +57,7 @@
use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
use crate::token::Token;
-use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer};
+use crate::tokenizer::{ContentType, Event, EventType, State, StateName, Tokenizer};
/// Start of a heading (atx).
///
@@ -68,17 +68,16 @@ use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer};
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.heading_atx {
tokenizer.enter(Token::HeadingAtx);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::HeadingAtxBefore)
} else {
State::Nok
}
@@ -90,7 +89,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
if Some(b'#') == tokenizer.current {
tokenizer.enter(Token::HeadingAtxSequence);
sequence_open(tokenizer)
@@ -105,7 +104,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa
/// ^
/// ```
-fn sequence_open(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => {
tokenizer.tokenize_state.size = 0;
@@ -115,12 +114,13 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {
Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(sequence_open))
+ State::Fn(StateName::HeadingAtxSequenceOpen)
}
_ if tokenizer.tokenize_state.size > 0 => {
tokenizer.tokenize_state.size = 0;
tokenizer.exit(Token::HeadingAtxSequence);
- tokenizer.go(space_or_tab(), at_break)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.go(state_name, StateName::HeadingAtxAtBreak)
}
_ => {
tokenizer.tokenize_state.size = 0;
@@ -135,7 +135,7 @@ fn sequence_open(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::HeadingAtx);
@@ -144,10 +144,13 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.interrupt = false;
State::Ok
}
- Some(b'\t' | b' ') => tokenizer.go(space_or_tab(), at_break)(tokenizer),
+ Some(b'\t' | b' ') => {
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.go(state_name, StateName::HeadingAtxAtBreak)
+ }
Some(b'#') => {
tokenizer.enter(Token::HeadingAtxSequence);
- further_sequence(tokenizer)
+ sequence_further(tokenizer)
}
Some(_) => {
tokenizer.enter_with_content(Token::Data, Some(ContentType::Text));
@@ -164,10 +167,10 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa ##
/// ^
/// ```
-fn further_sequence(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence_further(tokenizer: &mut Tokenizer) -> State {
if let Some(b'#') = tokenizer.current {
tokenizer.consume();
- State::Fn(Box::new(further_sequence))
+ State::Fn(StateName::HeadingAtxSequenceFurther)
} else {
tokenizer.exit(Token::HeadingAtxSequence);
at_break(tokenizer)
@@ -180,7 +183,7 @@ fn further_sequence(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa
/// ^
/// ```
-fn data(tokenizer: &mut Tokenizer) -> State {
+pub fn data(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.
None | Some(b'\t' | b'\n' | b' ') => {
@@ -189,7 +192,7 @@ fn data(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(data))
+ State::Fn(StateName::HeadingAtxData)
}
}
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 675b2ac..50feba4 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -60,7 +60,7 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::token::Token;
-use crate::tokenizer::{EventType, State, Tokenizer};
+use crate::tokenizer::{EventType, State, StateName, Tokenizer};
use crate::util::skip::opt_back as skip_opt_back;
/// At a line ending, presumably an underline.
@@ -83,17 +83,17 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
.token_type
== Token::Paragraph)
{
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+
+ tokenizer.go(state_name, StateName::HeadingSetextBefore)
} else {
State::Nok
}
@@ -106,7 +106,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | ==
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-' | b'=') => {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
@@ -124,16 +124,17 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | ==
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-' | b'=') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.consume();
- State::Fn(Box::new(inside))
+ State::Fn(StateName::HeadingSetextInside)
}
_ => {
tokenizer.tokenize_state.marker = 0;
tokenizer.exit(Token::HeadingSetextUnderline);
- tokenizer.attempt_opt(space_or_tab(), after)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::HeadingSetextAfter)
}
}
}
@@ -145,7 +146,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
/// > | ==
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
// Feel free to interrupt.
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index aaa803d..779146c 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -101,13 +101,11 @@
use crate::constant::{
HTML_BLOCK_NAMES, HTML_CDATA_PREFIX, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE,
};
-use crate::construct::{
- blank_line::start as blank_line,
- partial_non_lazy_continuation::start as partial_non_lazy_continuation,
- partial_space_or_tab::{space_or_tab_with_options, Options as SpaceOrTabOptions},
+use crate::construct::partial_space_or_tab::{
+ space_or_tab_with_options, Options as SpaceOrTabOptions,
};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
use crate::util::slice::Slice;
/// Symbol for `<script>` (condition 1).
@@ -134,8 +132,9 @@ const COMPLETE: u8 = 7;
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.html_flow {
tokenizer.enter(Token::HtmlFlow);
- tokenizer.go(
- space_or_tab_with_options(SpaceOrTabOptions {
+ let state_name = space_or_tab_with_options(
+ tokenizer,
+ SpaceOrTabOptions {
kind: Token::HtmlFlowData,
min: 0,
max: if tokenizer.parse_state.constructs.code_indented {
@@ -145,9 +144,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
},
connect: false,
content_type: None,
- }),
- before,
- )(tokenizer)
+ },
+ );
+
+ tokenizer.go(state_name, StateName::HtmlFlowBefore)
} else {
State::Nok
}
@@ -159,11 +159,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | <x />
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
if Some(b'<') == tokenizer.current {
tokenizer.enter(Token::HtmlFlowData);
tokenizer.consume();
- State::Fn(Box::new(open))
+ State::Fn(StateName::HtmlFlowOpen)
} else {
State::Nok
}
@@ -179,17 +179,17 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn open(tokenizer: &mut Tokenizer) -> State {
+pub fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'!') => {
tokenizer.consume();
- State::Fn(Box::new(declaration_open))
+ State::Fn(StateName::HtmlFlowDeclarationOpen)
}
Some(b'/') => {
tokenizer.consume();
tokenizer.tokenize_state.seen = true;
tokenizer.tokenize_state.start = tokenizer.point.index;
- State::Fn(Box::new(tag_close_start))
+ State::Fn(StateName::HtmlFlowTagCloseStart)
}
Some(b'?') => {
tokenizer.tokenize_state.marker = INSTRUCTION;
@@ -198,7 +198,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
tokenizer.concrete = true;
// While we’re in an instruction instead of a declaration, we’re on a `?`
// right now, so we do need to search for `>`, similar to declarations.
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
// ASCII alphabetical.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
@@ -219,24 +219,24 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | <![CDATA[>&<]]>
/// ^
/// ```
-fn declaration_open(tokenizer: &mut Tokenizer) -> State {
+pub fn declaration_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
tokenizer.tokenize_state.marker = COMMENT;
- State::Fn(Box::new(comment_open_inside))
+ State::Fn(StateName::HtmlFlowCommentOpenInside)
}
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
tokenizer.tokenize_state.marker = DECLARATION;
// Do not form containers.
tokenizer.concrete = true;
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
Some(b'[') => {
tokenizer.consume();
tokenizer.tokenize_state.marker = CDATA;
- State::Fn(Box::new(cdata_open_inside))
+ State::Fn(StateName::HtmlFlowCdataOpenInside)
}
_ => State::Nok,
}
@@ -248,12 +248,12 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
if let Some(b'-') = tokenizer.current {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
} else {
tokenizer.tokenize_state.marker = 0;
State::Nok
@@ -266,7 +266,7 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
/// > | <![CDATA[>&<]]>
/// ^^^^^^
/// ```
-fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
@@ -275,9 +275,9 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size = 0;
// Do not form containers.
tokenizer.concrete = true;
- State::Fn(Box::new(continuation))
+ State::Fn(StateName::HtmlFlowContinuation)
} else {
- State::Fn(Box::new(cdata_open_inside))
+ State::Fn(StateName::HtmlFlowCdataOpenInside)
}
} else {
tokenizer.tokenize_state.marker = 0;
@@ -292,10 +292,10 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
/// > | </x>
/// ^
/// ```
-fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
if let Some(b'A'..=b'Z' | b'a'..=b'z') = tokenizer.current {
tokenizer.consume();
- State::Fn(Box::new(tag_name))
+ State::Fn(StateName::HtmlFlowTagName)
} else {
tokenizer.tokenize_state.seen = false;
tokenizer.tokenize_state.start = 0;
@@ -311,7 +311,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
/// > | </ab>
/// ^^
/// ```
-fn tag_name(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => {
let closing_tag = tokenizer.tokenize_state.seen;
@@ -340,7 +340,7 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State {
if slash {
tokenizer.consume();
- State::Fn(Box::new(basic_self_closing))
+ State::Fn(StateName::HtmlFlowBasicSelfClosing)
} else {
// Do not form containers.
tokenizer.concrete = true;
@@ -363,7 +363,7 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State {
// ASCII alphanumerical and `-`.
Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_name))
+ State::Fn(StateName::HtmlFlowTagName)
}
Some(_) => {
tokenizer.tokenize_state.seen = false;
@@ -378,12 +378,12 @@ fn tag_name(tokenizer: &mut Tokenizer) -> State {
/// > | <div/>
/// ^
/// ```
-fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {
+pub fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {
if let Some(b'>') = tokenizer.current {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
- State::Fn(Box::new(continuation))
+ State::Fn(StateName::HtmlFlowContinuation)
} else {
tokenizer.tokenize_state.marker = 0;
State::Nok
@@ -396,11 +396,11 @@ fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {
/// > | <x/>
/// ^
/// ```
-fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_closing_tag_after))
+ State::Fn(StateName::HtmlFlowCompleteClosingTagAfter)
}
_ => complete_end(tokenizer),
}
@@ -425,20 +425,20 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {
/// > | <a >
/// ^
/// ```
-fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_name_before))
+ State::Fn(StateName::HtmlFlowCompleteAttributeNameBefore)
}
Some(b'/') => {
tokenizer.consume();
- State::Fn(Box::new(complete_end))
+ State::Fn(StateName::HtmlFlowCompleteEnd)
}
// ASCII alphanumerical and `:` and `_`.
Some(b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_name))
+ State::Fn(StateName::HtmlFlowCompleteAttributeName)
}
_ => complete_end(tokenizer),
}
@@ -454,12 +454,12 @@ fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {
/// > | <a b>
/// ^
/// ```
-fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumerical and `-`, `.`, `:`, and `_`.
Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_name))
+ State::Fn(StateName::HtmlFlowCompleteAttributeName)
}
_ => complete_attribute_name_after(tokenizer),
}
@@ -474,15 +474,15 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {
/// > | <a b=c>
/// ^
/// ```
-fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_name_after))
+ State::Fn(StateName::HtmlFlowCompleteAttributeNameAfter)
}
Some(b'=') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_before))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueBefore)
}
_ => complete_attribute_name_before(tokenizer),
}
@@ -497,7 +497,7 @@ fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
/// > | <a b="c">
/// ^
/// ```
-fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'<' | b'=' | b'>' | b'`') => {
tokenizer.tokenize_state.marker = 0;
@@ -505,12 +505,12 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_before))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueBefore)
}
Some(b'"' | b'\'') => {
tokenizer.tokenize_state.marker_other = tokenizer.current.unwrap();
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_quoted))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueQuoted)
}
_ => complete_attribute_value_unquoted(tokenizer),
}
@@ -524,7 +524,7 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// > | <a b='c'>
/// ^
/// ```
-fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.tokenize_state.marker = 0;
@@ -536,11 +536,11 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
{
tokenizer.tokenize_state.marker_other = 0;
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_quoted_after))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueQuotedAfter)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_quoted))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueQuoted)
}
}
}
@@ -551,14 +551,14 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
/// > | <a b=c>
/// ^
/// ```
-fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => {
complete_attribute_name_after(tokenizer)
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(complete_attribute_value_unquoted))
+ State::Fn(StateName::HtmlFlowCompleteAttributeValueUnquoted)
}
}
}
@@ -570,7 +570,7 @@ fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
/// > | <a b="c">
/// ^
/// ```
-fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\t' | b' ' | b'/' | b'>') = tokenizer.current {
complete_attribute_name_before(tokenizer)
} else {
@@ -585,10 +585,10 @@ fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
/// > | <a b="c">
/// ^
/// ```
-fn complete_end(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_end(tokenizer: &mut Tokenizer) -> State {
if let Some(b'>') = tokenizer.current {
tokenizer.consume();
- State::Fn(Box::new(complete_after))
+ State::Fn(StateName::HtmlFlowCompleteAfter)
} else {
tokenizer.tokenize_state.marker = 0;
State::Nok
@@ -601,7 +601,7 @@ fn complete_end(tokenizer: &mut Tokenizer) -> State {
/// > | <x>
/// ^
/// ```
-fn complete_after(tokenizer: &mut Tokenizer) -> State {
+pub fn complete_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
// Do not form containers.
@@ -610,7 +610,7 @@ fn complete_after(tokenizer: &mut Tokenizer) -> State {
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(complete_after))
+ State::Fn(StateName::HtmlFlowCompleteAfter)
}
Some(_) => {
tokenizer.tokenize_state.marker = 0;
@@ -625,20 +625,20 @@ fn complete_after(tokenizer: &mut Tokenizer) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn continuation(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n')
if tokenizer.tokenize_state.marker == BASIC
|| tokenizer.tokenize_state.marker == COMPLETE =>
{
tokenizer.exit(Token::HtmlFlowData);
- tokenizer.check(blank_line_before, |ok| {
- Box::new(if ok {
- continuation_after
+ tokenizer.check(StateName::HtmlFlowBlankLineBefore, |ok| {
+ State::Fn(if ok {
+ StateName::HtmlFlowContinuationAfter
} else {
- continuation_start
+ StateName::HtmlFlowContinuationStart
})
- })(tokenizer)
+ })
}
// Note: important that this is after the basic/complete case.
None | Some(b'\n') => {
@@ -647,27 +647,27 @@ fn continuation(tokenizer: &mut Tokenizer) -> State {
}
Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {
tokenizer.consume();
- State::Fn(Box::new(continuation_comment_inside))
+ State::Fn(StateName::HtmlFlowContinuationCommentInside)
}
Some(b'<') if tokenizer.tokenize_state.marker == RAW => {
tokenizer.consume();
- State::Fn(Box::new(continuation_raw_tag_open))
+ State::Fn(StateName::HtmlFlowContinuationRawTagOpen)
}
Some(b'>') if tokenizer.tokenize_state.marker == DECLARATION => {
tokenizer.consume();
- State::Fn(Box::new(continuation_close))
+ State::Fn(StateName::HtmlFlowContinuationClose)
}
Some(b'?') if tokenizer.tokenize_state.marker == INSTRUCTION => {
tokenizer.consume();
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
Some(b']') if tokenizer.tokenize_state.marker == CDATA => {
tokenizer.consume();
- State::Fn(Box::new(continuation_character_data_inside))
+ State::Fn(StateName::HtmlFlowContinuationCdataInside)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(continuation))
+ State::Fn(StateName::HtmlFlowContinuation)
}
}
}
@@ -679,14 +679,14 @@ fn continuation(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | asd
/// ```
-fn continuation_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.check(partial_non_lazy_continuation, |ok| {
- Box::new(if ok {
- continuation_start_non_lazy
+pub fn continuation_start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.check(StateName::NonLazyContinuationStart, |ok| {
+ State::Fn(if ok {
+ StateName::HtmlFlowContinuationStartNonLazy
} else {
- continuation_after
+ StateName::HtmlFlowContinuationAfter
})
- })(tokenizer)
+ })
}
/// In continuation, at an eol, before non-lazy content.
@@ -696,13 +696,13 @@ fn continuation_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | asd
/// ```
-fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(continuation_before))
+ State::Fn(StateName::HtmlFlowContinuationBefore)
}
_ => unreachable!("expected eol"),
}
@@ -715,7 +715,7 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {
/// > | asd
/// ^
/// ```
-fn continuation_before(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => continuation_start(tokenizer),
_ => {
@@ -731,11 +731,11 @@ fn continuation_before(tokenizer: &mut Tokenizer) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
_ => continuation(tokenizer),
}
@@ -747,12 +747,12 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {
/// > | <script>console.log(1)</script>
/// ^
/// ```
-fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'/') => {
tokenizer.consume();
tokenizer.tokenize_state.start = tokenizer.point.index;
- State::Fn(Box::new(continuation_raw_end_tag))
+ State::Fn(StateName::HtmlFlowContinuationRawEndTag)
}
_ => continuation(tokenizer),
}
@@ -764,7 +764,7 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {
/// > | <script>console.log(1)</script>
/// ^^^^^^
/// ```
-fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
// Guaranteed to be valid ASCII bytes.
@@ -779,7 +779,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
if HTML_RAW_NAMES.contains(&name.as_str()) {
tokenizer.consume();
- State::Fn(Box::new(continuation_close))
+ State::Fn(StateName::HtmlFlowContinuationClose)
} else {
continuation(tokenizer)
}
@@ -788,7 +788,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
if tokenizer.point.index - tokenizer.tokenize_state.start < HTML_RAW_SIZE_MAX =>
{
tokenizer.consume();
- State::Fn(Box::new(continuation_raw_end_tag))
+ State::Fn(StateName::HtmlFlowContinuationRawEndTag)
}
_ => {
tokenizer.tokenize_state.start = 0;
@@ -803,11 +803,11 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
/// > | <![CDATA[>&<]]>
/// ^
/// ```
-fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b']') => {
tokenizer.consume();
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
_ => continuation(tokenizer),
}
@@ -827,15 +827,15 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State {
/// > | <![CDATA[>&<]]>
/// ^
/// ```
-fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.consume();
- State::Fn(Box::new(continuation_close))
+ State::Fn(StateName::HtmlFlowContinuationClose)
}
Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {
tokenizer.consume();
- State::Fn(Box::new(continuation_declaration_inside))
+ State::Fn(StateName::HtmlFlowContinuationDeclarationInside)
}
_ => continuation(tokenizer),
}
@@ -847,7 +847,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {
/// > | <!doctype>
/// ^
/// ```
-fn continuation_close(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::HtmlFlowData);
@@ -855,7 +855,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(continuation_close))
+ State::Fn(StateName::HtmlFlowContinuationClose)
}
}
}
@@ -866,7 +866,7 @@ fn continuation_close(tokenizer: &mut Tokenizer) -> State {
/// > | <!doctype>
/// ^
/// ```
-fn continuation_after(tokenizer: &mut Tokenizer) -> State {
+pub fn continuation_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::HtmlFlow);
tokenizer.tokenize_state.marker = 0;
// Feel free to interrupt.
@@ -883,9 +883,9 @@ fn continuation_after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// |
/// ```
-fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
+pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(blank_line))
+ State::Fn(StateName::BlankLineStart)
}
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index a4c0349..1c1f9e6 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -57,7 +57,7 @@
use crate::constant::HTML_CDATA_PREFIX;
use crate::construct::partial_space_or_tab::space_or_tab;
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of HTML (text)
///
@@ -70,7 +70,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::HtmlText);
tokenizer.enter(Token::HtmlTextData);
tokenizer.consume();
- State::Fn(Box::new(open))
+ State::Fn(StateName::HtmlTextOpen)
} else {
State::Nok
}
@@ -86,24 +86,24 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a <!--b--> c
/// ^
/// ```
-fn open(tokenizer: &mut Tokenizer) -> State {
+pub fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'!') => {
tokenizer.consume();
- State::Fn(Box::new(declaration_open))
+ State::Fn(StateName::HtmlTextDeclarationOpen)
}
Some(b'/') => {
tokenizer.consume();
- State::Fn(Box::new(tag_close_start))
+ State::Fn(StateName::HtmlTextTagCloseStart)
}
Some(b'?') => {
tokenizer.consume();
- State::Fn(Box::new(instruction))
+ State::Fn(StateName::HtmlTextInstruction)
}
// ASCII alphabetical.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open))
+ State::Fn(StateName::HtmlTextTagOpen)
}
_ => State::Nok,
}
@@ -119,20 +119,20 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> c
/// ^
/// ```
-fn declaration_open(tokenizer: &mut Tokenizer) -> State {
+pub fn declaration_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(comment_open_inside))
+ State::Fn(StateName::HtmlTextCommentOpenInside)
}
// ASCII alphabetical.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(declaration))
+ State::Fn(StateName::HtmlTextDeclaration)
}
Some(b'[') => {
tokenizer.consume();
- State::Fn(Box::new(cdata_open_inside))
+ State::Fn(StateName::HtmlTextCdataOpenInside)
}
_ => State::Nok,
}
@@ -144,11 +144,11 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {
/// > | a <!--b--> c
/// ^
/// ```
-fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(comment_start))
+ State::Fn(StateName::HtmlTextCommentStart)
}
_ => State::Nok,
}
@@ -167,12 +167,12 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
/// ```
///
/// [html_flow]: crate::construct::html_flow
-fn comment_start(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => State::Nok,
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(comment_start_dash))
+ State::Fn(StateName::HtmlTextCommentStartDash)
}
_ => comment(tokenizer),
}
@@ -191,7 +191,7 @@ fn comment_start(tokenizer: &mut Tokenizer) -> State {
/// ```
///
/// [html_flow]: crate::construct::html_flow
-fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => State::Nok,
_ => comment(tokenizer),
@@ -204,20 +204,20 @@ fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
/// > | a <!--b--> c
/// ^
/// ```
-fn comment(tokenizer: &mut Tokenizer) -> State {
+pub fn comment(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(comment));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextComment);
+ line_ending_before(tokenizer)
}
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(comment_close))
+ State::Fn(StateName::HtmlTextCommentClose)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(comment))
+ State::Fn(StateName::HtmlTextComment)
}
}
}
@@ -228,11 +228,11 @@ fn comment(tokenizer: &mut Tokenizer) -> State {
/// > | a <!--b--> c
/// ^
/// ```
-fn comment_close(tokenizer: &mut Tokenizer) -> State {
+pub fn comment_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(end))
+ State::Fn(StateName::HtmlTextEnd)
}
_ => comment(tokenizer),
}
@@ -244,16 +244,16 @@ fn comment_close(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^^^^^^
/// ```
-fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() {
tokenizer.tokenize_state.size = 0;
- State::Fn(Box::new(cdata))
+ State::Fn(StateName::HtmlTextCdata)
} else {
- State::Fn(Box::new(cdata_open_inside))
+ State::Fn(StateName::HtmlTextCdataOpenInside)
}
} else {
State::Nok
@@ -266,20 +266,20 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^^^
/// ```
-fn cdata(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(cdata));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextCdata);
+ line_ending_before(tokenizer)
}
Some(b']') => {
tokenizer.consume();
- State::Fn(Box::new(cdata_close))
+ State::Fn(StateName::HtmlTextCdataClose)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(cdata))
+ State::Fn(StateName::HtmlTextCdata)
}
}
}
@@ -290,11 +290,11 @@ fn cdata(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^
/// ```
-fn cdata_close(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b']') => {
tokenizer.consume();
- State::Fn(Box::new(cdata_end))
+ State::Fn(StateName::HtmlTextCdataEnd)
}
_ => cdata(tokenizer),
}
@@ -306,7 +306,7 @@ fn cdata_close(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^
/// ```
-fn cdata_end(tokenizer: &mut Tokenizer) -> State {
+pub fn cdata_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => end(tokenizer),
Some(b']') => cdata_close(tokenizer),
@@ -320,16 +320,16 @@ fn cdata_end(tokenizer: &mut Tokenizer) -> State {
/// > | a <!b> c
/// ^
/// ```
-fn declaration(tokenizer: &mut Tokenizer) -> State {
+pub fn declaration(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'>') => end(tokenizer),
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(declaration));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextDeclaration);
+ line_ending_before(tokenizer)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(declaration))
+ State::Fn(StateName::HtmlTextDeclaration)
}
}
}
@@ -340,20 +340,20 @@ fn declaration(tokenizer: &mut Tokenizer) -> State {
/// > | a <?b?> c
/// ^
/// ```
-fn instruction(tokenizer: &mut Tokenizer) -> State {
+pub fn instruction(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(instruction));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextInstruction);
+ line_ending_before(tokenizer)
}
Some(b'?') => {
tokenizer.consume();
- State::Fn(Box::new(instruction_close))
+ State::Fn(StateName::HtmlTextInstructionClose)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(instruction))
+ State::Fn(StateName::HtmlTextInstruction)
}
}
}
@@ -364,7 +364,7 @@ fn instruction(tokenizer: &mut Tokenizer) -> State {
/// > | a <?b?> c
/// ^
/// ```
-fn instruction_close(tokenizer: &mut Tokenizer) -> State {
+pub fn instruction_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => end(tokenizer),
_ => instruction(tokenizer),
@@ -377,12 +377,12 @@ fn instruction_close(tokenizer: &mut Tokenizer) -> State {
/// > | a </b> c
/// ^
/// ```
-fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphabetical.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_close))
+ State::Fn(StateName::HtmlTextTagClose)
}
_ => State::Nok,
}
@@ -394,12 +394,12 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
/// > | a </b> c
/// ^
/// ```
-fn tag_close(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumerical and `-`.
Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_close))
+ State::Fn(StateName::HtmlTextTagClose)
}
_ => tag_close_between(tokenizer),
}
@@ -411,15 +411,15 @@ fn tag_close(tokenizer: &mut Tokenizer) -> State {
/// > | a </b> c
/// ^
/// ```
-fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_close_between));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagCloseBetween);
+ line_ending_before(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(tag_close_between))
+ State::Fn(StateName::HtmlTextTagCloseBetween)
}
_ => end(tokenizer),
}
@@ -431,12 +431,12 @@ fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
/// > | a <b> c
/// ^
/// ```
-fn tag_open(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumerical and `-`.
Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open))
+ State::Fn(StateName::HtmlTextTagOpen)
}
Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),
_ => State::Nok,
@@ -449,24 +449,24 @@ fn tag_open(tokenizer: &mut Tokenizer) -> State {
/// > | a <b> c
/// ^
/// ```
-fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_between));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state = Some(StateName::HtmlTextTagOpenBetween);
+ line_ending_before(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_between))
+ State::Fn(StateName::HtmlTextTagOpenBetween)
}
Some(b'/') => {
tokenizer.consume();
- State::Fn(Box::new(end))
+ State::Fn(StateName::HtmlTextEnd)
}
// ASCII alphabetical and `:` and `_`.
Some(b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_name))
+ State::Fn(StateName::HtmlTextTagOpenAttributeName)
}
_ => end(tokenizer),
}
@@ -478,12 +478,12 @@ fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c> d
/// ^
/// ```
-fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphabetical and `-`, `.`, `:`, and `_`.
Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_name))
+ State::Fn(StateName::HtmlTextTagOpenAttributeName)
}
_ => tag_open_attribute_name_after(tokenizer),
}
@@ -496,19 +496,20 @@ fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c> d
/// ^
/// ```
-fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_name_after));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state =
+ Some(StateName::HtmlTextTagOpenAttributeNameAfter);
+ line_ending_before(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_name_after))
+ State::Fn(StateName::HtmlTextTagOpenAttributeNameAfter)
}
Some(b'=') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_before))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueBefore)
}
_ => tag_open_between(tokenizer),
}
@@ -521,25 +522,26 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c=d> e
/// ^
/// ```
-fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok,
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_before));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state =
+ Some(StateName::HtmlTextTagOpenAttributeValueBefore);
+ line_ending_before(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_before))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueBefore)
}
Some(b'"' | b'\'') => {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_quoted))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueQuoted)
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_unquoted))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueUnquoted)
}
}
}
@@ -550,24 +552,25 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c="d"> e
/// ^
/// ```
-fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => {
tokenizer.tokenize_state.marker = 0;
State::Nok
}
Some(b'\n') => {
- tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_quoted));
- at_line_ending(tokenizer)
+ tokenizer.tokenize_state.return_state =
+ Some(StateName::HtmlTextTagOpenAttributeValueQuoted);
+ line_ending_before(tokenizer)
}
Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.tokenize_state.marker = 0;
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_quoted_after))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueQuotedAfter)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_quoted))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueQuoted)
}
}
}
@@ -578,13 +581,13 @@ fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c=d> e
/// ^
/// ```
-fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'"' | b'\'' | b'<' | b'=' | b'`') => State::Nok,
Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(tag_open_attribute_value_unquoted))
+ State::Fn(StateName::HtmlTextTagOpenAttributeValueUnquoted)
}
}
}
@@ -596,7 +599,7 @@ fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c="d"> e
/// ^
/// ```
-fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
+pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => tag_open_between(tokenizer),
_ => State::Nok,
@@ -609,7 +612,7 @@ fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c="d"> e
/// ^
/// ```
-fn end(tokenizer: &mut Tokenizer) -> State {
+pub fn end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.consume();
@@ -631,14 +634,14 @@ fn end(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | b-->
/// ```
-fn at_line_ending(tokenizer: &mut Tokenizer) -> State {
+pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.exit(Token::HtmlTextData);
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(after_line_ending))
+ State::Fn(StateName::HtmlTextLineEndingAfter)
}
_ => unreachable!("expected eol"),
}
@@ -654,8 +657,9 @@ fn at_line_ending(tokenizer: &mut Tokenizer) -> State {
/// > | b-->
/// ^
/// ```
-fn after_line_ending(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab(), after_line_ending_prefix)(tokenizer)
+pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::HtmlTextLineEndingAfterPrefix)
}
/// After a line ending, after indent.
@@ -668,8 +672,9 @@ fn after_line_ending(tokenizer: &mut Tokenizer) -> State {
/// > | b-->
/// ^
/// ```
-fn after_line_ending_prefix(tokenizer: &mut Tokenizer) -> State {
- let return_state = tokenizer.tokenize_state.return_state.take().unwrap();
+pub fn line_ending_after_prefix(tokenizer: &mut Tokenizer) -> State {
+ let state_name = tokenizer.tokenize_state.return_state.take().unwrap();
+ let func = state_name.to_func();
tokenizer.enter(Token::HtmlTextData);
- return_state(tokenizer)
+ func(tokenizer)
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index b38e15a..ae9fe77 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -147,12 +147,9 @@
//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX;
-use crate::construct::{
- partial_destination::start as destination, partial_label::start as label,
- partial_space_or_tab::space_or_tab_eol, partial_title::start as title,
-};
+use crate::construct::partial_space_or_tab::space_or_tab_eol;
use crate::token::Token;
-use crate::tokenizer::{Event, EventType, Media, State, Tokenizer};
+use crate::tokenizer::{Event, EventType, Media, State, StateName, Tokenizer};
use crate::util::{
normalize_identifier::normalize_identifier,
skip,
@@ -204,7 +201,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(Token::LabelMarker);
tokenizer.exit(Token::LabelEnd);
- return State::Fn(Box::new(after));
+ return State::Fn(StateName::LabelEndAfter);
}
}
@@ -223,7 +220,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | [a] b
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
let start = &tokenizer.label_start_stack[tokenizer.tokenize_state.start];
let defined = tokenizer
.parse_state
@@ -240,19 +237,23 @@ fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Resource (`[asd](fgh)`)?
- Some(b'(') => tokenizer.attempt(resource, move |is_ok| {
- Box::new(if is_ok || defined { ok } else { nok })
- })(tokenizer),
+ Some(b'(') => tokenizer.attempt(StateName::LabelEndResourceStart, move |is_ok| {
+ State::Fn(if is_ok || defined {
+ StateName::LabelEndOk
+ } else {
+ StateName::LabelEndNok
+ })
+ }),
// Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference?
- Some(b'[') => tokenizer.attempt(full_reference, move |is_ok| {
- Box::new(if is_ok {
- ok
+ Some(b'[') => tokenizer.attempt(StateName::LabelEndReferenceFull, move |is_ok| {
+ State::Fn(if is_ok {
+ StateName::LabelEndOk
} else if defined {
- reference_not_full
+ StateName::LabelEndReferenceNotFull
} else {
- nok
+ StateName::LabelEndNok
})
- })(tokenizer),
+ }),
// Shortcut (`[asd]`) reference?
_ => {
let func = if defined { ok } else { nok };
@@ -271,10 +272,14 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// > | [a] b
/// ^
/// ```
-fn reference_not_full(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(collapsed_reference, |is_ok| {
- Box::new(if is_ok { ok } else { nok })
- })(tokenizer)
+pub fn reference_not_full(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(StateName::LabelEndReferenceCollapsed, |is_ok| {
+ State::Fn(if is_ok {
+ StateName::LabelEndOk
+ } else {
+ StateName::LabelEndNok
+ })
+ })
}
/// Done, we found something.
@@ -289,7 +294,7 @@ fn reference_not_full(tokenizer: &mut Tokenizer) -> State {
/// > | [a] b
/// ^
/// ```
-fn ok(tokenizer: &mut Tokenizer) -> State {
+pub fn ok(tokenizer: &mut Tokenizer) -> State {
let label_start_index = tokenizer.tokenize_state.start;
// Remove this one and everything after it.
let mut left = tokenizer.label_start_stack.split_off(label_start_index);
@@ -332,7 +337,7 @@ fn ok(tokenizer: &mut Tokenizer) -> State {
/// > | [a] b
/// ^
/// ```
-fn nok(tokenizer: &mut Tokenizer) -> State {
+pub fn nok(tokenizer: &mut Tokenizer) -> State {
tokenizer
.label_start_stack
.get_mut(tokenizer.tokenize_state.start)
@@ -349,14 +354,14 @@ fn nok(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) c
/// ^
/// ```
-fn resource(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'(') => {
tokenizer.enter(Token::Resource);
tokenizer.enter(Token::ResourceMarker);
tokenizer.consume();
tokenizer.exit(Token::ResourceMarker);
- State::Fn(Box::new(resource_start))
+ State::Fn(StateName::LabelEndResourceBefore)
}
_ => unreachable!("expected `(`"),
}
@@ -368,8 +373,9 @@ fn resource(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) c
/// ^
/// ```
-fn resource_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(space_or_tab_eol(), resource_open)(tokenizer)
+pub fn resource_before(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_eol(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::LabelEndResourceOpen)
}
/// At the start of a resource, after optional whitespace.
@@ -378,7 +384,7 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) c
/// ^
/// ```
-fn resource_open(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_open(tokenizer: &mut Tokenizer) -> State {
if let Some(b')') = tokenizer.current {
resource_end(tokenizer)
} else {
@@ -389,13 +395,13 @@ fn resource_open(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_5 = Token::ResourceDestinationString;
tokenizer.tokenize_state.size_other = RESOURCE_DESTINATION_BALANCE_MAX;
- tokenizer.attempt(destination, |ok| {
- Box::new(if ok {
- destination_after
+ tokenizer.attempt(StateName::DestinationStart, |ok| {
+ State::Fn(if ok {
+ StateName::LabelEndResourceDestinationAfter
} else {
- destination_missing
+ StateName::LabelEndResourceDestinationMissing
})
- })(tokenizer)
+ })
}
}
@@ -405,21 +411,26 @@ fn resource_open(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) c
/// ^
/// ```
-fn destination_after(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
tokenizer.tokenize_state.token_4 = Token::Data;
tokenizer.tokenize_state.token_5 = Token::Data;
tokenizer.tokenize_state.size_other = 0;
-
- tokenizer.attempt(space_or_tab_eol(), |ok| {
- Box::new(if ok { resource_between } else { resource_end })
- })(tokenizer)
+ let state_name = space_or_tab_eol(tokenizer);
+
+ tokenizer.attempt(state_name, |ok| {
+ State::Fn(if ok {
+ StateName::LabelEndResourceBetween
+ } else {
+ StateName::LabelEndResourceEnd
+ })
+ })
}
/// Without destination.
-fn destination_missing(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
@@ -435,13 +446,13 @@ fn destination_missing(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b ) c
/// ^
/// ```
-fn resource_between(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'"' | b'\'' | b'(') => {
tokenizer.tokenize_state.token_1 = Token::ResourceTitle;
tokenizer.tokenize_state.token_2 = Token::ResourceTitleMarker;
tokenizer.tokenize_state.token_3 = Token::ResourceTitleString;
- tokenizer.go(title, title_after)(tokenizer)
+ tokenizer.go(StateName::TitleStart, StateName::LabelEndResourceTitleAfter)
}
_ => resource_end(tokenizer),
}
@@ -453,11 +464,12 @@ fn resource_between(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b "c") d
/// ^
/// ```
-fn title_after(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
- tokenizer.attempt_opt(space_or_tab_eol(), resource_end)(tokenizer)
+ let state_name = space_or_tab_eol(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::LabelEndResourceEnd)
}
/// In a resource, at the `)`.
@@ -466,7 +478,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {
/// > | [a](b) d
/// ^
/// ```
-fn resource_end(tokenizer: &mut Tokenizer) -> State {
+pub fn resource_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b')') => {
tokenizer.enter(Token::ResourceMarker);
@@ -485,13 +497,13 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State {
/// > | [a][b] d
/// ^
/// ```
-fn full_reference(tokenizer: &mut Tokenizer) -> State {
+pub fn reference_full(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[') => {
tokenizer.tokenize_state.token_1 = Token::Reference;
tokenizer.tokenize_state.token_2 = Token::ReferenceMarker;
tokenizer.tokenize_state.token_3 = Token::ReferenceString;
- tokenizer.go(label, full_reference_after)(tokenizer)
+ tokenizer.go(StateName::LabelStart, StateName::LabelEndReferenceFullAfter)
}
_ => unreachable!("expected `[`"),
}
@@ -503,7 +515,7 @@ fn full_reference(tokenizer: &mut Tokenizer) -> State {
/// > | [a][b] d
/// ^
/// ```
-fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
+pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Token::Data;
tokenizer.tokenize_state.token_2 = Token::Data;
tokenizer.tokenize_state.token_3 = Token::Data;
@@ -541,14 +553,14 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
/// > | [a][] d
/// ^
/// ```
-fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
+pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[') => {
tokenizer.enter(Token::Reference);
tokenizer.enter(Token::ReferenceMarker);
tokenizer.consume();
tokenizer.exit(Token::ReferenceMarker);
- State::Fn(Box::new(collapsed_reference_open))
+ State::Fn(StateName::LabelEndReferenceCollapsedOpen)
}
_ => State::Nok,
}
@@ -562,7 +574,7 @@ fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
/// > | [a][] d
/// ^
/// ```
-fn collapsed_reference_open(tokenizer: &mut Tokenizer) -> State {
+pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b']') => {
tokenizer.enter(Token::ReferenceMarker);
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 4a3508e..4fcf8c2 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -30,7 +30,7 @@
use super::label_end::resolve_media;
use crate::token::Token;
-use crate::tokenizer::{LabelStart, State, Tokenizer};
+use crate::tokenizer::{LabelStart, State, StateName, Tokenizer};
/// Start of label (image) start.
///
@@ -45,7 +45,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LabelImageMarker);
tokenizer.consume();
tokenizer.exit(Token::LabelImageMarker);
- State::Fn(Box::new(open))
+ State::Fn(StateName::LabelStartImageOpen)
}
_ => State::Nok,
}
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 0e12b7c..6ecfb04 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -45,12 +45,9 @@
//! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure
use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};
-use crate::construct::{
- blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max,
- thematic_break::start as thematic_break,
-};
+use crate::construct::partial_space_or_tab::space_or_tab_min_max;
use crate::token::Token;
-use crate::tokenizer::{EventType, State, Tokenizer};
+use crate::tokenizer::{EventType, State, StateName, Tokenizer};
use crate::util::{
skip,
slice::{Position, Slice},
@@ -65,17 +62,16 @@ use crate::util::{
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.list {
tokenizer.enter(Token::ListItem);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+ tokenizer.go(state_name, StateName::ListBefore)
} else {
State::Nok
}
@@ -87,12 +83,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Unordered.
- Some(b'*' | b'-') => tokenizer.check(thematic_break, |ok| {
- Box::new(if ok { nok } else { before_unordered })
- })(tokenizer),
+ Some(b'*' | b'-') => tokenizer.check(StateName::ThematicBreakStart, |ok| {
+ State::Fn(if ok {
+ StateName::ListNok
+ } else {
+ StateName::ListBeforeUnordered
+ })
+ }),
Some(b'+') => before_unordered(tokenizer),
// Ordered.
Some(b'0'..=b'9') if !tokenizer.interrupt => before_ordered(tokenizer),
@@ -109,7 +109,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn before_unordered(tokenizer: &mut Tokenizer) -> State {
+pub fn before_unordered(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::ListItemPrefix);
marker(tokenizer)
}
@@ -120,10 +120,10 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn before_ordered(tokenizer: &mut Tokenizer) -> State {
+pub fn before_ordered(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::ListItemPrefix);
tokenizer.enter(Token::ListItemValue);
- inside(tokenizer)
+ value(tokenizer)
}
/// In an ordered list item value.
@@ -132,7 +132,7 @@ fn before_ordered(tokenizer: &mut Tokenizer) -> State {
/// > | 1. a
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn value(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'.' | b')') if !tokenizer.interrupt || tokenizer.tokenize_state.size < 2 => {
tokenizer.exit(Token::ListItemValue);
@@ -141,7 +141,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
Some(b'0'..=b'9') if tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(inside))
+ State::Fn(StateName::ListValue)
}
_ => {
tokenizer.tokenize_state.size = 0;
@@ -158,11 +158,11 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
/// > | 1. b
/// ^
/// ```
-fn marker(tokenizer: &mut Tokenizer) -> State {
+pub fn marker(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::ListItemMarker);
tokenizer.consume();
tokenizer.exit(Token::ListItemMarker);
- State::Fn(Box::new(marker_after))
+ State::Fn(StateName::ListMarkerAfter)
}
/// After a list item marker.
@@ -173,11 +173,15 @@ fn marker(tokenizer: &mut Tokenizer) -> State {
/// > | 1. b
/// ^
/// ```
-fn marker_after(tokenizer: &mut Tokenizer) -> State {
+pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size = 1;
- tokenizer.check(blank_line, |ok| {
- Box::new(if ok { after } else { marker_after_not_blank })
- })(tokenizer)
+ tokenizer.check(StateName::BlankLineStart, |ok| {
+ State::Fn(if ok {
+ StateName::ListAfter
+ } else {
+ StateName::ListMarkerAfterFilled
+ })
+ })
}
/// After a list item marker, not followed by a blank line.
@@ -186,13 +190,17 @@ fn marker_after(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn marker_after_not_blank(tokenizer: &mut Tokenizer) -> State {
+pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size = 0;
// Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace.
- tokenizer.attempt(whitespace, |ok| {
- Box::new(if ok { after } else { prefix_other })
- })(tokenizer)
+ tokenizer.attempt(StateName::ListWhitespace, |ok| {
+ State::Fn(if ok {
+ StateName::ListAfter
+ } else {
+ StateName::ListPrefixOther
+ })
+ })
}
/// In whitespace after a marker.
@@ -201,8 +209,9 @@ fn marker_after_not_blank(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn whitespace(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(space_or_tab_min_max(1, TAB_SIZE), whitespace_after)(tokenizer)
+pub fn whitespace(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_min_max(tokenizer, 1, TAB_SIZE);
+ tokenizer.go(state_name, StateName::ListWhitespaceAfter)
}
/// After acceptable whitespace.
@@ -211,7 +220,7 @@ fn whitespace(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
+pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\t' | b' ') = tokenizer.current {
State::Nok
} else {
@@ -225,13 +234,13 @@ fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn prefix_other(tokenizer: &mut Tokenizer) -> State {
+pub fn prefix_other(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.enter(Token::SpaceOrTab);
tokenizer.consume();
tokenizer.exit(Token::SpaceOrTab);
- State::Fn(Box::new(after))
+ State::Fn(StateName::ListAfter)
}
_ => State::Nok,
}
@@ -243,7 +252,7 @@ fn prefix_other(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
let blank = tokenizer.tokenize_state.size == 1;
tokenizer.tokenize_state.size = 0;
@@ -285,10 +294,14 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// > | b
/// ^
/// ```
-pub fn cont(tokenizer: &mut Tokenizer) -> State {
- tokenizer.check(blank_line, |ok| {
- Box::new(if ok { blank_cont } else { not_blank_cont })
- })(tokenizer)
+pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.check(StateName::BlankLineStart, |ok| {
+ State::Fn(if ok {
+ StateName::ListContBlank
+ } else {
+ StateName::ListContFilled
+ })
+ })
}
/// Start of blank list item continuation.
@@ -299,15 +312,16 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | b
/// ```
-pub fn blank_cont(tokenizer: &mut Tokenizer) -> State {
+pub fn cont_blank(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.as_ref().unwrap();
let size = container.size;
if container.blank_initial {
State::Nok
} else {
+ let state_name = space_or_tab_min_max(tokenizer, 0, size);
// Consume, optionally, at most `size`.
- tokenizer.go(space_or_tab_min_max(0, size), ok)(tokenizer)
+ tokenizer.go(state_name, StateName::ListOk)
}
}
@@ -318,14 +332,15 @@ pub fn blank_cont(tokenizer: &mut Tokenizer) -> State {
/// > | b
/// ^
/// ```
-pub fn not_blank_cont(tokenizer: &mut Tokenizer) -> State {
+pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.as_mut().unwrap();
let size = container.size;
container.blank_initial = false;
// Consume exactly `size`.
- tokenizer.go(space_or_tab_min_max(size, size), ok)(tokenizer)
+ let state_name = space_or_tab_min_max(tokenizer, size, size);
+ tokenizer.go(state_name, StateName::ListOk)
}
/// A state fn to yield [`State::Ok`].
@@ -334,16 +349,16 @@ pub fn ok(_tokenizer: &mut Tokenizer) -> State {
}
/// A state fn to yield [`State::Nok`].
-fn nok(_tokenizer: &mut Tokenizer) -> State {
+pub fn nok(_tokenizer: &mut Tokenizer) -> State {
State::Nok
}
/// Find adjacent list items with the same marker.
pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
- let mut index = 0;
- let mut balance = 0;
let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
+ let mut index = 0;
+ let mut balance = 0;
// Merge list items.
while index < tokenizer.events.len() {
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 7fdaa66..de750f4 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -33,7 +33,7 @@
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
use crate::token::Token;
-use crate::tokenizer::{ContentType, EventType, State, Tokenizer};
+use crate::tokenizer::{ContentType, EventType, State, StateName, Tokenizer};
use crate::util::skip::opt as skip_opt;
/// Before a paragraph.
@@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | abc
/// ^^^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -71,7 +71,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(inside))
+ State::Fn(StateName::ParagraphInside)
}
}
}
diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs
index 2257bfd..b32b7f9 100644
--- a/src/construct/partial_bom.rs
+++ b/src/construct/partial_bom.rs
@@ -11,7 +11,7 @@
//! * [`micromark/lib/preprocess.js` in `micromark`](https://github.com/micromark/micromark/blob/ed23453/packages/micromark/dev/lib/preprocess.js#L54-L60)
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
@@ -36,7 +36,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | 0xEF 0xBB 0xBF
/// ^^^^ ^^^^ ^^^^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(BOM[tokenizer.tokenize_state.size]) {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
@@ -45,7 +45,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size = 0;
State::Ok
} else {
- State::Fn(Box::new(inside))
+ State::Fn(StateName::BomInside)
}
} else {
tokenizer.tokenize_state.size = 0;
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 0365489..1cb5e61 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -7,7 +7,7 @@
//! [text]: crate::content::text
use crate::token::Token;
-use crate::tokenizer::{EventType, State, Tokenizer};
+use crate::tokenizer::{EventType, State, StateName, Tokenizer};
/// At the beginning of data.
///
@@ -17,10 +17,11 @@ use crate::tokenizer::{EventType, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
+ // Make sure to eat the first `stop`.
Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {
tokenizer.enter(Token::Data);
tokenizer.consume();
- State::Fn(Box::new(data))
+ State::Fn(StateName::DataInside)
}
_ => at_break(tokenizer),
}
@@ -32,14 +33,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | abc
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(at_break))
+ State::Fn(StateName::DataAtBreak)
}
Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {
tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data));
@@ -47,7 +48,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
}
_ => {
tokenizer.enter(Token::Data);
- data(tokenizer)
+ inside(tokenizer)
}
}
}
@@ -58,7 +59,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// > | abc
/// ^^^
/// ```
-fn data(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
let done = match tokenizer.current {
None | Some(b'\n') => true,
Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => true,
@@ -70,7 +71,7 @@ fn data(tokenizer: &mut Tokenizer) -> State {
at_break(tokenizer)
} else {
tokenizer.consume();
- State::Fn(Box::new(data))
+ State::Fn(StateName::DataInside)
}
}
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index f1cfc7d..e8818a0 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -72,7 +72,7 @@
//! [sanitize_uri]: crate::util::sanitize_uri
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
/// Before a destination.
///
@@ -90,7 +90,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
tokenizer.consume();
tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
- State::Fn(Box::new(enclosed_before))
+ State::Fn(StateName::DestinationEnclosedBefore)
}
// ASCII control, space, closing paren, but *not* `\0`.
None | Some(0x01..=0x1F | b' ' | b')' | 0x7F) => State::Nok,
@@ -110,7 +110,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | <aa>
/// ^
/// ```
-fn enclosed_before(tokenizer: &mut Tokenizer) -> State {
+pub fn enclosed_before(tokenizer: &mut Tokenizer) -> State {
if let Some(b'>') = tokenizer.current {
tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
tokenizer.consume();
@@ -131,7 +131,7 @@ fn enclosed_before(tokenizer: &mut Tokenizer) -> State {
/// > | <aa>
/// ^
/// ```
-fn enclosed(tokenizer: &mut Tokenizer) -> State {
+pub fn enclosed(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n' | b'<') => State::Nok,
Some(b'>') => {
@@ -141,11 +141,11 @@ fn enclosed(tokenizer: &mut Tokenizer) -> State {
}
Some(b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(enclosed_escape))
+ State::Fn(StateName::DestinationEnclosedEscape)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(enclosed))
+ State::Fn(StateName::DestinationEnclosed)
}
}
}
@@ -156,11 +156,11 @@ fn enclosed(tokenizer: &mut Tokenizer) -> State {
/// > | <a\*a>
/// ^
/// ```
-fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {
+pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'<' | b'>' | b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(enclosed))
+ State::Fn(StateName::DestinationEnclosed)
}
_ => enclosed(tokenizer),
}
@@ -172,7 +172,7 @@ fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {
/// > | aa
/// ^
/// ```
-fn raw(tokenizer: &mut Tokenizer) -> State {
+pub fn raw(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\t' | b'\n' | b' ' | b')') if tokenizer.tokenize_state.size == 0 => {
tokenizer.exit(Token::Data);
@@ -185,7 +185,7 @@ fn raw(tokenizer: &mut Tokenizer) -> State {
Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_other => {
tokenizer.consume();
tokenizer.tokenize_state.size += 1;
- State::Fn(Box::new(raw))
+ State::Fn(StateName::DestinationRaw)
}
// ASCII control (but *not* `\0`) and space and `(`.
None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) => {
@@ -195,15 +195,15 @@ fn raw(tokenizer: &mut Tokenizer) -> State {
Some(b')') => {
tokenizer.consume();
tokenizer.tokenize_state.size -= 1;
- State::Fn(Box::new(raw))
+ State::Fn(StateName::DestinationRaw)
}
Some(b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(raw_escape))
+ State::Fn(StateName::DestinationRawEscape)
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(raw))
+ State::Fn(StateName::DestinationRaw)
}
}
}
@@ -214,11 +214,11 @@ fn raw(tokenizer: &mut Tokenizer) -> State {
/// > | a\*a
/// ^
/// ```
-fn raw_escape(tokenizer: &mut Tokenizer) -> State {
+pub fn raw_escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'(' | b')' | b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(raw))
+ State::Fn(StateName::DestinationRaw)
}
_ => raw(tokenizer),
}
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 0e1c2ec..0c8366e 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -62,7 +62,7 @@ use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::constant::LINK_REFERENCE_SIZE_MAX;
use crate::subtokenize::link;
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
/// Before a label.
///
@@ -78,7 +78,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
- State::Fn(Box::new(at_break))
+ State::Fn(StateName::LabelAtBreak)
}
_ => State::Nok,
}
@@ -90,7 +90,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | [a]
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX
|| matches!(tokenizer.current, None | Some(b'['))
|| (matches!(tokenizer.current, Some(b']')) && !tokenizer.tokenize_state.seen)
@@ -101,13 +101,22 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
State::Nok
} else {
match tokenizer.current {
- Some(b'\n') => tokenizer.attempt(
- space_or_tab_eol_with_options(EolOptions {
- content_type: Some(ContentType::String),
- connect: tokenizer.tokenize_state.connect,
- }),
- |ok| Box::new(if ok { after_eol } else { at_blank_line }),
- )(tokenizer),
+ Some(b'\n') => {
+ let state_name = space_or_tab_eol_with_options(
+ tokenizer,
+ EolOptions {
+ content_type: Some(ContentType::String),
+ connect: tokenizer.tokenize_state.connect,
+ },
+ );
+ tokenizer.attempt(state_name, |ok| {
+ State::Fn(if ok {
+ StateName::LabelEolAfter
+ } else {
+ StateName::LabelAtBlankLine
+ })
+ })
+ }
Some(b']') => {
tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
@@ -129,20 +138,20 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.connect = true;
}
- label(tokenizer)
+ inside(tokenizer)
}
}
}
}
/// To do.
-fn after_eol(tokenizer: &mut Tokenizer) -> State {
+pub fn eol_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.connect = true;
at_break(tokenizer)
}
/// To do.
-fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
+pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.connect = false;
State::Nok
@@ -154,7 +163,7 @@ fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
/// > | [a]
/// ^
/// ```
-fn label(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n' | b'[' | b']') => {
tokenizer.exit(Token::Data);
@@ -165,13 +174,16 @@ fn label(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::Data);
at_break(tokenizer)
} else {
- let func = if matches!(byte, b'\\') { escape } else { label };
tokenizer.consume();
tokenizer.tokenize_state.size += 1;
if !tokenizer.tokenize_state.seen && !matches!(byte, b'\t' | b' ') {
tokenizer.tokenize_state.seen = true;
}
- State::Fn(Box::new(func))
+ State::Fn(if matches!(byte, b'\\') {
+ StateName::LabelEscape
+ } else {
+ StateName::LabelInside
+ })
}
}
}
@@ -183,13 +195,13 @@ fn label(tokenizer: &mut Tokenizer) -> State {
/// > | [a\*a]
/// ^
/// ```
-fn escape(tokenizer: &mut Tokenizer) -> State {
+pub fn escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[' | b'\\' | b']') => {
tokenizer.consume();
tokenizer.tokenize_state.size += 1;
- State::Fn(Box::new(label))
+ State::Fn(StateName::LabelInside)
}
- _ => label(tokenizer),
+ _ => inside(tokenizer),
}
}
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
index 6005a6c..6d5cd7a 100644
--- a/src/construct/partial_non_lazy_continuation.rs
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -11,7 +11,7 @@
//! [html_flow]: crate::construct::html_flow
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of continuation.
///
@@ -26,7 +26,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(after))
+ State::Fn(StateName::NonLazyContinuationAfter)
}
_ => State::Nok,
}
@@ -39,7 +39,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | b
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
if tokenizer.lazy {
State::Nok
} else {
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index e3eac45..b0b35a6 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -6,7 +6,7 @@
use crate::subtokenize::link;
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, StateFn, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
/// Options to parse `space_or_tab`.
#[derive(Debug)]
@@ -37,8 +37,8 @@ pub struct EolOptions {
/// ```bnf
/// space_or_tab ::= 1*( ' ' '\t' )
/// ```
-pub fn space_or_tab() -> Box<StateFn> {
- space_or_tab_min_max(1, usize::MAX)
+pub fn space_or_tab(tokenizer: &mut Tokenizer) -> StateName {
+ space_or_tab_min_max(tokenizer, 1, usize::MAX)
}
/// Between `x` and `y` `space_or_tab`.
@@ -46,26 +46,27 @@ pub fn space_or_tab() -> Box<StateFn> {
/// ```bnf
/// space_or_tab_min_max ::= x*y( ' ' '\t' )
/// ```
-pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
- space_or_tab_with_options(Options {
- kind: Token::SpaceOrTab,
- min,
- max,
- content_type: None,
- connect: false,
- })
+pub fn space_or_tab_min_max(tokenizer: &mut Tokenizer, min: usize, max: usize) -> StateName {
+ space_or_tab_with_options(
+ tokenizer,
+ Options {
+ kind: Token::SpaceOrTab,
+ min,
+ max,
+ content_type: None,
+ connect: false,
+ },
+ )
}
/// `space_or_tab`, with the given options.
-pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
- Box::new(|tokenizer| {
- tokenizer.tokenize_state.space_or_tab_connect = options.connect;
- tokenizer.tokenize_state.space_or_tab_content_type = options.content_type;
- tokenizer.tokenize_state.space_or_tab_min = options.min;
- tokenizer.tokenize_state.space_or_tab_max = options.max;
- tokenizer.tokenize_state.space_or_tab_token = options.kind;
- start(tokenizer)
- })
+pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName {
+ tokenizer.tokenize_state.space_or_tab_connect = options.connect;
+ tokenizer.tokenize_state.space_or_tab_content_type = options.content_type;
+ tokenizer.tokenize_state.space_or_tab_min = options.min;
+ tokenizer.tokenize_state.space_or_tab_max = options.max;
+ tokenizer.tokenize_state.space_or_tab_token = options.kind;
+ StateName::SpaceOrTabStart
}
/// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and
@@ -74,41 +75,21 @@ pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
/// ```bnf
/// space_or_tab_eol ::= 1*( ' ' '\t' ) | 0*( ' ' '\t' ) eol 0*( ' ' '\t' )
/// ```
-pub fn space_or_tab_eol() -> Box<StateFn> {
- space_or_tab_eol_with_options(EolOptions {
- content_type: None,
- connect: false,
- })
+pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName {
+ space_or_tab_eol_with_options(
+ tokenizer,
+ EolOptions {
+ content_type: None,
+ connect: false,
+ },
+ )
}
/// `space_or_tab_eol`, with the given options.
-pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
- Box::new(move |tokenizer| {
- tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type;
- tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect;
-
- tokenizer.attempt(
- space_or_tab_with_options(Options {
- kind: Token::SpaceOrTab,
- min: 1,
- max: usize::MAX,
- content_type: tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .clone(),
- connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
- }),
- move |ok| {
- Box::new(move |tokenizer| {
- if ok {
- tokenizer.tokenize_state.space_or_tab_eol_ok = ok;
- }
-
- after_space_or_tab(tokenizer)
- })
- },
- )(tokenizer)
- })
+pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: EolOptions) -> StateName {
+ tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type;
+ tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect;
+ StateName::SpaceOrTabEolStart
}
/// Before `space_or_tab`.
@@ -117,7 +98,7 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
/// > | a␠␠b
/// ^
/// ```
-fn start(tokenizer: &mut Tokenizer) -> State {
+pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') if tokenizer.tokenize_state.space_or_tab_max > 0 => {
tokenizer.enter_with_content(
@@ -144,7 +125,7 @@ fn start(tokenizer: &mut Tokenizer) -> State {
/// > | a␠␠b
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ')
if tokenizer.tokenize_state.space_or_tab_size
@@ -152,7 +133,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
{
tokenizer.consume();
tokenizer.tokenize_state.space_or_tab_size += 1;
- State::Fn(Box::new(inside))
+ State::Fn(StateName::SpaceOrTabInside)
}
_ => {
tokenizer.exit(tokenizer.tokenize_state.space_or_tab_token.clone());
@@ -167,7 +148,7 @@ fn inside(tokenizer: &mut Tokenizer) -> State {
/// > | a␠␠b
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
let state = if tokenizer.tokenize_state.space_or_tab_size
>= tokenizer.tokenize_state.space_or_tab_min
{
@@ -184,6 +165,44 @@ fn after(tokenizer: &mut Tokenizer) -> State {
state
}
+pub fn eol_start(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_with_options(
+ tokenizer,
+ Options {
+ kind: Token::SpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ content_type: tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .clone(),
+ connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
+ },
+ );
+
+ tokenizer.attempt(state_name, move |ok| {
+ State::Fn(if ok {
+ StateName::SpaceOrTabEolAfterFirst
+ } else {
+ StateName::SpaceOrTabEolAtEol
+ })
+ })
+}
+
+pub fn eol_after_first(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.space_or_tab_eol_ok = true;
+
+ if tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .is_some()
+ {
+ tokenizer.tokenize_state.space_or_tab_eol_connect = true;
+ }
+
+ eol_at_eol(tokenizer)
+}
+
/// `space_or_tab_eol`: after optionally first `space_or_tab`.
///
/// ```markdown
@@ -191,16 +210,7 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | b
/// ```
-fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.tokenize_state.space_or_tab_eol_ok
- && tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .is_some()
- {
- tokenizer.tokenize_state.space_or_tab_eol_connect = true;
- }
-
+pub fn eol_at_eol(tokenizer: &mut Tokenizer) -> State {
if let Some(b'\n') = tokenizer.current {
tokenizer.enter_with_content(
Token::LineEnding,
@@ -223,17 +233,17 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(after_eol))
+ State::Fn(StateName::SpaceOrTabEolAfterEol)
} else {
- let state = if tokenizer.tokenize_state.space_or_tab_eol_ok {
- State::Ok
- } else {
- State::Nok
- };
+ let ok = tokenizer.tokenize_state.space_or_tab_eol_ok;
tokenizer.tokenize_state.space_or_tab_eol_content_type = None;
tokenizer.tokenize_state.space_or_tab_eol_connect = false;
tokenizer.tokenize_state.space_or_tab_eol_ok = false;
- state
+ if ok {
+ State::Ok
+ } else {
+ State::Nok
+ }
}
}
@@ -245,9 +255,10 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
#[allow(clippy::needless_pass_by_value)]
-fn after_eol(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(
- space_or_tab_with_options(Options {
+pub fn eol_after_eol(tokenizer: &mut Tokenizer) -> State {
+ let state_name = space_or_tab_with_options(
+ tokenizer,
+ Options {
kind: Token::SpaceOrTab,
min: 1,
max: usize::MAX,
@@ -256,9 +267,9 @@ fn after_eol(tokenizer: &mut Tokenizer) -> State {
.space_or_tab_eol_content_type
.clone(),
connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
- }),
- after_more_space_or_tab,
- )(tokenizer)
+ },
+ );
+ tokenizer.attempt_opt(state_name, StateName::SpaceOrTabEolAfterMore)
}
/// `space_or_tab_eol`: after more (optional) `space_or_tab`.
@@ -268,7 +279,7 @@ fn after_eol(tokenizer: &mut Tokenizer) -> State {
/// > | b
/// ^
/// ```
-fn after_more_space_or_tab(tokenizer: &mut Tokenizer) -> State {
+pub fn eol_after_more(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.space_or_tab_eol_content_type = None;
tokenizer.tokenize_state.space_or_tab_eol_connect = false;
tokenizer.tokenize_state.space_or_tab_eol_ok = false;
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 6bf9099..8b72608 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -30,10 +30,10 @@
//! [character_reference]: crate::construct::character_reference
//! [label_end]: crate::construct::label_end
-use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
+use crate::construct::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::subtokenize::link;
use crate::token::Token;
-use crate::tokenizer::{ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateName, Tokenizer};
/// Before a title.
///
@@ -50,7 +50,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
tokenizer.consume();
tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
- State::Fn(Box::new(begin))
+ State::Fn(StateName::TitleBegin)
}
_ => State::Nok,
}
@@ -64,7 +64,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | "a"
/// ^
/// ```
-fn begin(tokenizer: &mut Tokenizer) -> State {
+pub fn begin(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'"' | b'\'' | b')')
if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
@@ -90,20 +90,30 @@ fn begin(tokenizer: &mut Tokenizer) -> State {
/// > | "a"
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => {
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.connect = false;
State::Nok
}
- Some(b'\n') => tokenizer.attempt(
- space_or_tab_eol_with_options(EolOptions {
- content_type: Some(ContentType::String),
- connect: tokenizer.tokenize_state.connect,
- }),
- |ok| Box::new(if ok { after_eol } else { at_blank_line }),
- )(tokenizer),
+ Some(b'\n') => {
+ let state_name = space_or_tab_eol_with_options(
+ tokenizer,
+ EolOptions {
+ content_type: Some(ContentType::String),
+ connect: tokenizer.tokenize_state.connect,
+ },
+ );
+
+ tokenizer.attempt(state_name, |ok| {
+ State::Fn(if ok {
+ StateName::TitleAfterEol
+ } else {
+ StateName::TitleAtBlankLine
+ })
+ })
+ }
Some(b'"' | b'\'' | b')')
if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
{
@@ -120,19 +130,19 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.connect = true;
}
- title(tokenizer)
+ inside(tokenizer)
}
}
}
/// To do.
-fn after_eol(tokenizer: &mut Tokenizer) -> State {
+pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.connect = true;
at_break(tokenizer)
}
/// To do.
-fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
+pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.connect = false;
State::Nok
@@ -144,7 +154,7 @@ fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
/// > | "a"
/// ^
/// ```
-fn title(tokenizer: &mut Tokenizer) -> State {
+pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -157,9 +167,12 @@ fn title(tokenizer: &mut Tokenizer) -> State {
at_break(tokenizer)
}
Some(byte) => {
- let func = if matches!(byte, b'\\') { escape } else { title };
tokenizer.consume();
- State::Fn(Box::new(func))
+ State::Fn(if matches!(byte, b'\\') {
+ StateName::TitleEscape
+ } else {
+ StateName::TitleInside
+ })
}
}
}
@@ -170,12 +183,12 @@ fn title(tokenizer: &mut Tokenizer) -> State {
/// > | "a\*b"
/// ^
/// ```
-fn escape(tokenizer: &mut Tokenizer) -> State {
+pub fn escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'"' | b'\'' | b')') => {
tokenizer.consume();
- State::Fn(Box::new(title))
+ State::Fn(StateName::TitleInside)
}
- _ => title(tokenizer),
+ _ => inside(tokenizer),
}
}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 2ed2046..4ed25b6 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -51,7 +51,7 @@
use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Start of a thematic break.
///
@@ -62,17 +62,17 @@ use crate::tokenizer::{State, Tokenizer};
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.thematic_break {
tokenizer.enter(Token::ThematicBreak);
- tokenizer.go(
- space_or_tab_min_max(
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ),
- before,
- )(tokenizer)
+ let state_name = space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ );
+
+ tokenizer.go(state_name, StateName::ThematicBreakBefore)
} else {
State::Nok
}
@@ -84,7 +84,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | ***
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'*' | b'-' | b'_') => {
tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
@@ -100,7 +100,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | ***
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer) -> State {
+pub fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {
tokenizer.tokenize_state.marker = 0;
@@ -130,18 +130,19 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// > | ***
/// ^
/// ```
-fn sequence(tokenizer: &mut Tokenizer) -> State {
+pub fn sequence(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'*' | b'-' | b'_')
if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
{
tokenizer.consume();
tokenizer.tokenize_state.size += 1;
- State::Fn(Box::new(sequence))
+ State::Fn(StateName::ThematicBreakSequence)
}
_ => {
tokenizer.exit(Token::ThematicBreakSequence);
- tokenizer.attempt_opt(space_or_tab(), at_break)(tokenizer)
+ let state_name = space_or_tab(tokenizer);
+ tokenizer.attempt_opt(state_name, StateName::ThematicBreakAtBreak)
}
}
}
diff --git a/src/content/document.rs b/src/content/document.rs
index 33c8ff9..7a43d48 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -8,16 +8,13 @@
//! * [Block quote][crate::construct::block_quote]
//! * [List][crate::construct::list]
-use crate::construct::{
- block_quote::{cont as block_quote_cont, start as block_quote},
- list::{cont as list_item_const, start as list_item},
- partial_bom::start as bom,
-};
-use crate::content::flow::start as flow;
use crate::parser::ParseState;
use crate::subtokenize::subtokenize;
use crate::token::Token;
-use crate::tokenizer::{Container, ContainerState, Event, EventType, Point, State, Tokenizer};
+use crate::tokenizer::{
+ Container, ContainerState, ContentType, Event, EventType, Link, Point, State, StateName,
+ Tokenizer,
+};
use crate::util::{
normalize_identifier::normalize_identifier,
skip,
@@ -59,7 +56,7 @@ enum Phase {
pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
let mut tokenizer = Tokenizer::new(point, parse_state);
- let state = tokenizer.push(0, parse_state.bytes.len(), Box::new(start));
+ let state = tokenizer.push(0, parse_state.bytes.len(), StateName::DocumentStart);
tokenizer.flush(state, true);
let mut index = 0;
@@ -103,8 +100,13 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
/// > | a
/// ^
/// ```
-fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(bom, line_start)(tokenizer)
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.child_tokenizer = Some(Box::new(Tokenizer::new(
+ tokenizer.point.clone(),
+ tokenizer.parse_state,
+ )));
+ tokenizer.tokenize_state.document_child_state = Some(State::Fn(StateName::FlowStart));
+ tokenizer.attempt_opt(StateName::BomStart, StateName::DocumentLineStart)
}
/// Start of a line.
@@ -115,13 +117,8 @@ fn start(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-fn line_start(tokenizer: &mut Tokenizer) -> State {
+pub fn line_start(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.document_continued = 0;
- tokenizer.tokenize_state.document_index = tokenizer.events.len();
- tokenizer
- .tokenize_state
- .document_inject
- .push((vec![], vec![]));
// Containers would only be interrupting if we’ve continued.
tokenizer.interrupt = false;
container_existing_before(tokenizer)
@@ -134,7 +131,7 @@ fn line_start(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
+pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
// If there are more existing containers, check whether the next one continues.
if tokenizer.tokenize_state.document_continued
< tokenizer.tokenize_state.document_container_stack.len()
@@ -143,19 +140,19 @@ fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
.tokenize_state
.document_container_stack
.remove(tokenizer.tokenize_state.document_continued);
- let cont = match container.kind {
- Container::BlockQuote => block_quote_cont,
- Container::ListItem => list_item_const,
+ let state_name = match container.kind {
+ Container::BlockQuote => StateName::BlockQuoteContStart,
+ Container::ListItem => StateName::ListContStart,
};
tokenizer.container = Some(container);
- tokenizer.attempt(cont, |ok| {
- Box::new(if ok {
- container_existing_after
+ tokenizer.attempt(state_name, |ok| {
+ State::Fn(if ok {
+ StateName::DocumentContainerExistingAfter
} else {
- container_existing_missing
+ StateName::DocumentContainerExistingMissing
})
- })(tokenizer)
+ })
}
// Otherwise, check new containers.
else {
@@ -170,7 +167,7 @@ fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-fn container_existing_missing(tokenizer: &mut Tokenizer) -> State {
+pub fn container_existing_missing(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.take().unwrap();
tokenizer
.tokenize_state
@@ -186,7 +183,7 @@ fn container_existing_missing(tokenizer: &mut Tokenizer) -> State {
/// > | b
/// ^
/// ```
-fn container_existing_after(tokenizer: &mut Tokenizer) -> State {
+pub fn container_existing_after(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.take().unwrap();
tokenizer
.tokenize_state
@@ -204,17 +201,28 @@ fn container_existing_after(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-fn container_new_before(tokenizer: &mut Tokenizer) -> State {
+pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
// If we have completely continued, restore the flow’s past `interrupt`
// status.
if tokenizer.tokenize_state.document_continued
== tokenizer.tokenize_state.document_container_stack.len()
{
- tokenizer.interrupt = tokenizer.tokenize_state.document_interrupt_before;
+ tokenizer.interrupt = tokenizer
+ .tokenize_state
+ .child_tokenizer
+ .as_ref()
+ .unwrap()
+ .interrupt;
// …and if we’re in a concrete construct, new containers can’t “pierce”
// into them.
- if tokenizer.concrete {
+ if tokenizer
+ .tokenize_state
+ .child_tokenizer
+ .as_ref()
+ .unwrap()
+ .concrete
+ {
return containers_after(tokenizer);
}
}
@@ -227,17 +235,17 @@ fn container_new_before(tokenizer: &mut Tokenizer) -> State {
size: 0,
});
- tokenizer.attempt(block_quote, |ok| {
- Box::new(if ok {
- container_new_after
+ tokenizer.attempt(StateName::BlockQuoteStart, |ok| {
+ State::Fn(if ok {
+ StateName::DocumentContainerNewAfter
} else {
- container_new_before_not_blockquote
+ StateName::DocumentContainerNewBeforeNotBlockQuote
})
- })(tokenizer)
+ })
}
/// To do.
-fn container_new_before_not_blockquote(tokenizer: &mut Tokenizer) -> State {
+pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State {
// List item?
tokenizer.container = Some(ContainerState {
kind: Container::ListItem,
@@ -245,13 +253,13 @@ fn container_new_before_not_blockquote(tokenizer: &mut Tokenizer) -> State {
size: 0,
});
- tokenizer.attempt(list_item, |ok| {
- Box::new(if ok {
- container_new_after
+ tokenizer.attempt(StateName::ListStart, |ok| {
+ State::Fn(if ok {
+ StateName::DocumentContainerNewAfter
} else {
- containers_after
+ StateName::DocumentContainersAfter
})
- })(tokenizer)
+ })
}
/// After a new container.
@@ -262,31 +270,9 @@ fn container_new_before_not_blockquote(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-fn container_new_after(tokenizer: &mut Tokenizer) -> State {
+pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.take().unwrap();
- // Remove from the event stack.
- // We’ll properly add exits at different points manually.
- let token_type = match container.kind {
- Container::BlockQuote => Token::BlockQuote,
- Container::ListItem => Token::ListItem,
- };
-
- let mut stack_index = tokenizer.stack.len();
- let mut found = false;
-
- while stack_index > 0 {
- stack_index -= 1;
-
- if tokenizer.stack[stack_index] == token_type {
- tokenizer.stack.remove(stack_index);
- found = true;
- break;
- }
- }
-
- debug_assert!(found, "expected to find container token to exit");
-
// If we did not continue all existing containers, and there is a new one,
// close the flow and those containers.
if tokenizer.tokenize_state.document_continued
@@ -314,37 +300,55 @@ fn container_new_after(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-fn containers_after(tokenizer: &mut Tokenizer) -> State {
- // Store the container events we parsed.
- tokenizer
- .tokenize_state
- .document_inject
- .last_mut()
- .unwrap()
- .0
- .append(
- &mut tokenizer
- .events
- .split_off(tokenizer.tokenize_state.document_index),
- );
+pub fn containers_after(tokenizer: &mut Tokenizer) -> State {
+ if let Some(ref mut child) = tokenizer.tokenize_state.child_tokenizer {
+ child.lazy = tokenizer.tokenize_state.document_continued
+ != tokenizer.tokenize_state.document_container_stack.len();
+ child.interrupt = tokenizer.tokenize_state.document_interrupt_before;
+ child.define_skip(tokenizer.point.clone());
+ }
- tokenizer.lazy = tokenizer.tokenize_state.document_continued
- != tokenizer.tokenize_state.document_container_stack.len();
- tokenizer.interrupt = tokenizer.tokenize_state.document_interrupt_before;
- tokenizer.define_skip_current();
+ match tokenizer.current {
+ // Note: EOL is part of data.
+ None => flow_end(tokenizer),
+ Some(_) => {
+ let current = tokenizer.events.len();
+ let previous = tokenizer.tokenize_state.document_data_index.take();
+ if let Some(previous) = previous {
+ tokenizer.events[previous].link.as_mut().unwrap().next = Some(current);
+ }
+ tokenizer.tokenize_state.document_data_index = Some(current);
+ tokenizer.enter_with_link(
+ Token::Data,
+ Some(Link {
+ previous,
+ next: None,
+ content_type: ContentType::Flow,
+ }),
+ );
+ flow_inside(tokenizer)
+ }
+ }
+}
- let state = tokenizer
- .tokenize_state
- .document_next
- .take()
- .unwrap_or_else(|| Box::new(flow));
-
- // Parse flow, pausing after eols.
- tokenizer.go_until(
- state,
- |code| matches!(code, Some(b'\n')),
- |state| Box::new(|t| flow_end(t, state)),
- )(tokenizer)
+/// To do.
+pub fn flow_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None => {
+ tokenizer.exit(Token::Data);
+ flow_end(tokenizer)
+ }
+ // Note: EOL is part of data.
+ Some(b'\n') => {
+ tokenizer.consume();
+ tokenizer.exit(Token::Data);
+ State::Fn(StateName::DocumentFlowEnd)
+ }
+ Some(_) => {
+ tokenizer.consume();
+ State::Fn(StateName::DocumentFlowInside)
+ }
+ }
}
/// After flow (after eol or at eof).
@@ -354,42 +358,70 @@ fn containers_after(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^ ^
/// ```
-fn flow_end(tokenizer: &mut Tokenizer, result: State) -> State {
- let paragraph = !tokenizer.events.is_empty()
- && tokenizer.events[skip::opt_back(
- &tokenizer.events,
- tokenizer.events.len() - 1,
- &[Token::LineEnding],
- )]
- .token_type
- == Token::Paragraph;
-
- if tokenizer.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before {
- tokenizer.tokenize_state.document_continued =
- tokenizer.tokenize_state.document_container_stack.len();
- }
-
- if tokenizer.tokenize_state.document_continued
- != tokenizer.tokenize_state.document_container_stack.len()
+pub fn flow_end(tokenizer: &mut Tokenizer) -> State {
+ let mut paragraph = false;
+ let mut interrupt = false;
+
+ // We have new data.
+ // Note that everything except for a `null` is data.
+ if tokenizer.events.len() > 1
+ && tokenizer.events[tokenizer.events.len() - 1].token_type == Token::Data
{
- exit_containers(tokenizer, &Phase::After);
- }
+ let position = Position::from_exit_event(&tokenizer.events, tokenizer.events.len() - 1);
+
+ let state = tokenizer
+ .tokenize_state
+ .document_child_state
+ .take()
+ .unwrap_or(State::Fn(StateName::FlowStart));
+
+ let state_name = match state {
+ State::Fn(state_name) => state_name,
+ _ => unreachable!("expected state name"),
+ };
+
+ if let Some(ref mut child) = tokenizer.tokenize_state.child_tokenizer {
+ // To do: handle VS?
+ // if position.start.vs > 0 {
+ // }
+ let state = child.push(position.start.index, position.end.index, state_name);
+
+ interrupt = child.interrupt;
+ paragraph = matches!(state, State::Fn(StateName::ParagraphInside))
+ || (!child.events.is_empty()
+ && child.events[skip::opt_back(
+ &child.events,
+ child.events.len() - 1,
+ &[Token::LineEnding],
+ )]
+ .token_type
+ == Token::Paragraph);
+
+ tokenizer.tokenize_state.document_child_state = Some(state);
+
+ if child.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before {
+ tokenizer.tokenize_state.document_continued =
+ tokenizer.tokenize_state.document_container_stack.len();
+ }
- match result {
- State::Ok => {
- if !tokenizer.tokenize_state.document_container_stack.is_empty() {
- tokenizer.tokenize_state.document_continued = 0;
- exit_containers(tokenizer, &Phase::Eof);
+ if tokenizer.tokenize_state.document_continued
+ != tokenizer.tokenize_state.document_container_stack.len()
+ {
+ exit_containers(tokenizer, &Phase::After);
}
+ }
+ }
+ match tokenizer.current {
+ None => {
+ tokenizer.tokenize_state.document_continued = 0;
+ exit_containers(tokenizer, &Phase::Eof);
resolve(tokenizer);
State::Ok
}
- State::Nok => unreachable!("unexpected `nok` from flow"),
- State::Fn(func) => {
+ Some(_) => {
tokenizer.tokenize_state.document_paragraph_before = paragraph;
- tokenizer.tokenize_state.document_interrupt_before = tokenizer.interrupt;
- tokenizer.tokenize_state.document_next = Some(func);
+ tokenizer.tokenize_state.document_interrupt_before = interrupt;
line_start(tokenizer)
}
}
@@ -403,98 +435,248 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
.split_off(tokenizer.tokenize_state.document_continued);
// So, we’re at the end of a line, but we need to close the *previous* line.
- if *phase != Phase::Eof {
- tokenizer.define_skip_current();
- let mut current_events = tokenizer
- .events
- .split_off(tokenizer.tokenize_state.document_index);
- let state = tokenizer
- .tokenize_state
- .document_next
- .take()
- .unwrap_or_else(|| Box::new(flow));
- tokenizer.flush(State::Fn(state), false);
-
- if *phase == Phase::Prefix {
- tokenizer.tokenize_state.document_index = tokenizer.events.len();
+ if let Some(ref mut child) = tokenizer.tokenize_state.child_tokenizer {
+ if *phase != Phase::After {
+ let state = tokenizer
+ .tokenize_state
+ .document_child_state
+ .take()
+ .unwrap_or(State::Fn(StateName::FlowStart));
+
+ child.flush(state, false);
}
- tokenizer.events.append(&mut current_events);
- }
+ if !stack_close.is_empty() {
+ let mut inject_index = tokenizer.events.len();
- let mut exits = Vec::with_capacity(stack_close.len());
+ // Move past the current data to find the last container start if we’re
+ // closing due to a potential lazy flow that was not lazy.
+ if *phase == Phase::After {
+ inject_index -= 2;
+ }
- while !stack_close.is_empty() {
- let container = stack_close.pop().unwrap();
- let token_type = match container.kind {
- Container::BlockQuote => Token::BlockQuote,
- Container::ListItem => Token::ListItem,
- };
+ // Move past the container starts to find the last data if we’re
+ // closing due to a different container or lazy flow like above.
+ if *phase == Phase::After || *phase == Phase::Prefix {
+ while inject_index > 0 {
+ let event = &tokenizer.events[inject_index - 1];
+
+ if event.token_type == Token::Data {
+ break;
+ }
+
+ inject_index -= 1;
+ }
+ }
+
+ // Move past data starts that are just whitespace only without
+ // container starts.
+ while inject_index > 0 {
+ let event = &tokenizer.events[inject_index - 1];
+
+ if event.token_type == Token::Data {
+ if event.event_type == EventType::Exit {
+ let slice = Slice::from_position(
+ tokenizer.parse_state.bytes,
+ &Position::from_exit_event(&tokenizer.events, inject_index - 1),
+ );
+ let bytes = slice.bytes;
+ let mut whitespace = true;
+ let mut index = 0;
+ while index < bytes.len() {
+ match bytes[index] {
+ b'\t' | b'\n' | b'\r' | b' ' => index += 1,
+ _ => {
+ whitespace = false;
+ break;
+ }
+ }
+ }
+
+ if !whitespace {
+ break;
+ }
+ }
+ } else {
+ break;
+ }
+
+ inject_index -= 1;
+ }
+
+ let ref_point = if inject_index == tokenizer.events.len() {
+ tokenizer.point.clone()
+ } else {
+ tokenizer.events[inject_index].point.clone()
+ };
+
+ let mut exits = Vec::with_capacity(stack_close.len());
+
+ while !stack_close.is_empty() {
+ let container = stack_close.pop().unwrap();
+ let token_type = match container.kind {
+ Container::BlockQuote => Token::BlockQuote,
+ Container::ListItem => Token::ListItem,
+ };
+
+ exits.push(Event {
+ event_type: EventType::Exit,
+ token_type: token_type.clone(),
+ point: ref_point.clone(),
+ link: None,
+ });
+
+ let mut stack_index = tokenizer.stack.len();
+ let mut found = false;
+
+ while stack_index > 0 {
+ stack_index -= 1;
+
+ if tokenizer.stack[stack_index] == token_type {
+ tokenizer.stack.remove(stack_index);
+ found = true;
+ break;
+ }
+ }
+
+ debug_assert!(found, "expected to find container token to exit");
+ }
- exits.push(Event {
- event_type: EventType::Exit,
- token_type: token_type.clone(),
- // Note: positions are fixed later.
- point: tokenizer.point.clone(),
- link: None,
- });
+ tokenizer.map.add(inject_index, 0, exits);
+ }
}
- let index =
- tokenizer.tokenize_state.document_inject.len() - (if *phase == Phase::Eof { 1 } else { 2 });
- tokenizer.tokenize_state.document_inject[index]
- .1
- .append(&mut exits);
tokenizer.tokenize_state.document_interrupt_before = false;
}
// Inject the container events.
fn resolve(tokenizer: &mut Tokenizer) {
- let mut index = 0;
- let mut inject = tokenizer.tokenize_state.document_inject.split_off(0);
- inject.reverse();
- let mut first_line_ending_in_run = None;
-
- while let Some((before, mut after)) = inject.pop() {
- if !before.is_empty() {
- first_line_ending_in_run = None;
- tokenizer.map.add(index, 0, before);
- }
+ let mut child = tokenizer.tokenize_state.child_tokenizer.take().unwrap();
+ child.map.consume(&mut child.events);
+ // To do: see if we can do this less.
+ tokenizer.map.consume(&mut tokenizer.events);
- while index < tokenizer.events.len() {
- let event = &tokenizer.events[index];
+ let mut link_index = skip::to(&tokenizer.events, 0, &[Token::Data]);
+ // To do: share this code with `subtokenize`.
+ // Now, loop through all subevents to figure out which parts
+ // belong where and fix deep links.
+ let mut subindex = 0;
+ let mut slices = vec![];
+ let mut slice_start = 0;
+ let mut old_prev: Option<usize> = None;
+
+ while subindex < child.events.len() {
+ // Find the first event that starts after the end we’re looking
+ // for.
+ if child.events[subindex].event_type == EventType::Enter
+ && child.events[subindex].point.index >= tokenizer.events[link_index + 1].point.index
+ {
+ slices.push((link_index, slice_start));
+ slice_start = subindex;
+ link_index = tokenizer.events[link_index]
+ .link
+ .as_ref()
+ .unwrap()
+ .next
+ .unwrap();
+ }
- if event.token_type == Token::LineEnding || event.token_type == Token::BlankLineEnding {
- if event.event_type == EventType::Enter {
- first_line_ending_in_run = first_line_ending_in_run.or(Some(index));
+ // Fix sublinks.
+ if let Some(sublink_curr) = &child.events[subindex].link {
+ if sublink_curr.previous.is_some() {
+ let old_prev = old_prev.unwrap();
+ let prev_event = &mut child.events[old_prev];
+ // The `index` in `events` where the current link is,
+ // minus one to get the previous link,
+ // minus 2 events (the enter and exit) for each removed
+ // link.
+ let new_link = if slices.is_empty() {
+ old_prev + link_index + 2
} else {
- index += 1;
- break;
- }
- } else if event.token_type == Token::SpaceOrTab {
- // Empty to allow whitespace in blank lines.
- } else if first_line_ending_in_run.is_some() {
- first_line_ending_in_run = None;
+ old_prev + link_index - (slices.len() - 1) * 2
+ };
+ prev_event.link.as_mut().unwrap().next = Some(new_link);
}
+ }
- index += 1;
+ // If there is a `next` link in the subevents, we have to change
+ // its `previous` index to account for the shifted events.
+ // If it points to a next event, we also change the next event’s
+ // reference back to *this* event.
+ if let Some(sublink_curr) = &child.events[subindex].link {
+ if let Some(next) = sublink_curr.next {
+ let sublink_next = child.events[next].link.as_mut().unwrap();
+
+ old_prev = sublink_next.previous;
+
+ sublink_next.previous = sublink_next
+ .previous
+ // The `index` in `events` where the current link is,
+ // minus 2 events (the enter and exit) for each removed
+ // link.
+ .map(|previous| previous + link_index - (slices.len() * 2));
+ }
}
- let point_rel = if let Some(index) = first_line_ending_in_run {
- &tokenizer.events[index].point
- } else {
- &tokenizer.point
- };
+ subindex += 1;
+ }
- let close_index = first_line_ending_in_run.unwrap_or(index);
+ if !child.events.is_empty() {
+ slices.push((link_index, slice_start));
+ }
+
+ // Finally, inject the subevents.
+ let mut index = slices.len();
+
+ while index > 0 {
+ index -= 1;
+ let start = slices[index].0;
+ tokenizer.map.add(
+ start,
+ if start == tokenizer.events.len() {
+ 0
+ } else {
+ 2
+ },
+ child.events.split_off(slices[index].1),
+ );
+ }
+ // To do: share the above code with `subtokenize`.
- let mut subevent_index = 0;
- while subevent_index < after.len() {
- after[subevent_index].point = point_rel.clone();
- subevent_index += 1;
+ let mut resolvers = child.resolvers.split_off(0);
+ let mut resolver_ids = child.resolver_ids.split_off(0);
+ tokenizer.resolvers.append(&mut resolvers);
+ tokenizer.resolver_ids.append(&mut resolver_ids);
+
+ // To do: see if we can do this less.
+ tokenizer.map.consume(&mut tokenizer.events);
+
+ let mut index = 0;
+ let mut last_eol_enter: Option<usize> = None;
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.event_type == EventType::Exit {
+ if event.token_type == Token::BlockQuote || event.token_type == Token::ListItem {
+ if let Some(inject) = last_eol_enter {
+ let point = tokenizer.events[inject].point.clone();
+ let mut clone = event.clone();
+ clone.point = point;
+ // Inject a fixed exit.
+ tokenizer.map.add(inject, 0, vec![clone]);
+ // Remove this exit.
+ tokenizer.map.add(index, 1, vec![]);
+ }
+ } else if event.token_type == Token::LineEnding
+ || event.token_type == Token::BlankLineEnding
+ {
+ last_eol_enter = Some(index - 1);
+ } else {
+ last_eol_enter = None;
+ }
}
- tokenizer.map.add(close_index, 0, after);
+ index += 1;
}
tokenizer.map.consume(&mut tokenizer.events);
diff --git a/src/content/flow.rs b/src/content/flow.rs
index bf4104c..6f62901 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -19,15 +19,8 @@
//! * [HTML (flow)][crate::construct::html_flow]
//! * [Thematic break][crate::construct::thematic_break]
-use crate::construct::{
- blank_line::start as blank_line, code_fenced::start as code_fenced,
- code_indented::start as code_indented, definition::start as definition,
- heading_atx::start as heading_atx, heading_setext::start as heading_setext,
- html_flow::start as html_flow, paragraph::start as paragraph,
- thematic_break::start as thematic_break,
-};
use crate::token::Token;
-use crate::tokenizer::{State, Tokenizer};
+use crate::tokenizer::{State, StateName, Tokenizer};
/// Before flow.
///
@@ -42,9 +35,13 @@ use crate::tokenizer::{State, Tokenizer};
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
- _ => tokenizer.attempt(blank_line, |ok| {
- Box::new(if ok { blank_line_after } else { initial_before })
- })(tokenizer),
+ _ => tokenizer.attempt(StateName::BlankLineStart, |ok| {
+ State::Fn(if ok {
+ StateName::FlowBlankLineAfter
+ } else {
+ StateName::FlowBefore
+ })
+ }),
}
}
@@ -60,21 +57,27 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// |~~~js
/// |<div>
/// ```
-fn initial_before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
_ => tokenizer.attempt_n(
vec![
- Box::new(code_indented),
- Box::new(code_fenced),
- Box::new(html_flow),
- Box::new(heading_atx),
- Box::new(heading_setext),
- Box::new(thematic_break),
- Box::new(definition),
+ StateName::CodeIndentedStart,
+ StateName::CodeFencedStart,
+ StateName::HtmlFlowStart,
+ StateName::HeadingAtxStart,
+ StateName::HeadingSetextStart,
+ StateName::ThematicBreakStart,
+ StateName::DefinitionStart,
],
- |ok| Box::new(if ok { after } else { before_paragraph }),
- )(tokenizer),
+ |ok| {
+ State::Fn(if ok {
+ StateName::FlowAfter
+ } else {
+ StateName::FlowBeforeParagraph
+ })
+ },
+ ),
}
}
@@ -85,7 +88,7 @@ fn initial_before(tokenizer: &mut Tokenizer) -> State {
/// ```markdown
/// ␠␠|
/// ```
-fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
+pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
Some(b'\n') => {
@@ -94,7 +97,7 @@ fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::BlankLineEnding);
// Feel free to interrupt.
tokenizer.interrupt = false;
- State::Fn(Box::new(start))
+ State::Fn(StateName::FlowStart)
}
_ => unreachable!("expected eol/eof"),
}
@@ -109,14 +112,14 @@ fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
/// asd
/// ~~~|
/// ```
-fn after(tokenizer: &mut Tokenizer) -> State {
+pub fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(start))
+ State::Fn(StateName::FlowStart)
}
_ => unreachable!("expected eol/eof"),
}
@@ -127,6 +130,6 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// ```markdown
/// |asd
/// ```
-fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(paragraph, after)(tokenizer)
+pub fn before_paragraph(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.go(StateName::ParagraphStart, StateName::FlowAfter)
}
diff --git a/src/content/string.rs b/src/content/string.rs
index 2e738fb..697ec2c 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -12,11 +12,8 @@
//!
//! [text]: crate::content::text
-use crate::construct::{
- character_escape::start as character_escape, character_reference::start as character_reference,
- partial_data::start as data, partial_whitespace::resolve_whitespace,
-};
-use crate::tokenizer::{State, Tokenizer};
+use crate::construct::partial_whitespace::resolve_whitespace;
+use crate::tokenizer::{State, StateName, Tokenizer};
const MARKERS: [u8; 2] = [b'&', b'\\'];
@@ -28,19 +25,28 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
/// Before string.
-fn before(tokenizer: &mut Tokenizer) -> State {
+pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
_ => tokenizer.attempt_n(
- vec![Box::new(character_reference), Box::new(character_escape)],
- |ok| Box::new(if ok { before } else { before_data }),
- )(tokenizer),
+ vec![
+ StateName::CharacterReferenceStart,
+ StateName::CharacterEscapeStart,
+ ],
+ |ok| {
+ State::Fn(if ok {
+ StateName::StringBefore
+ } else {
+ StateName::StringBeforeData
+ })
+ },
+ ),
}
}
/// At data.
-fn before_data(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(data, before)(tokenizer)
+pub fn before_data(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.go(StateName::DataStart, StateName::StringBefore)
}
/// Resolve whitespace.
diff --git a/src/content/text.rs b/src/content/text.rs
index f4666d1..d8a2726 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -20,15 +20,8 @@
//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
//! > [whitespace][crate::construct::partial_whitespace].
-use crate::construct::{
- attention::start as attention, autolink::start as autolink,
- character_escape::start as character_escape, character_reference::start as character_reference,
- code_text::start as code_text, hard_break_escape::start as hard_break_escape,
- html_text::start as html_text, label_end::start as label_end,
- label_start_image::start as label_start_image, label_start_link::start as label_start_link,
- partial_data::start as data, partial_whitespace::resolve_whitespace,
-};
-use crate::tokenizer::{State, Tokenizer};
+use crate::construct::partial_whitespace::resolve_whitespace;
+use crate::tokenizer::{State, StateName, Tokenizer};
const MARKERS: [u8; 9] = [
b'!', // `label_start_image`
@@ -55,19 +48,25 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
None => State::Ok,
_ => tokenizer.attempt_n(
vec![
- Box::new(attention),
- Box::new(autolink),
- Box::new(character_escape),
- Box::new(character_reference),
- Box::new(code_text),
- Box::new(hard_break_escape),
- Box::new(html_text),
- Box::new(label_end),
- Box::new(label_start_image),
- Box::new(label_start_link),
+ StateName::AttentionStart,
+ StateName::AutolinkStart,
+ StateName::CharacterEscapeStart,
+ StateName::CharacterReferenceStart,
+ StateName::CodeTextStart,
+ StateName::HardBreakEscapeStart,
+ StateName::HtmlTextStart,
+ StateName::LabelEndStart,
+ StateName::LabelStartImageStart,
+ StateName::LabelStartLinkStart,
],
- |ok| Box::new(if ok { before } else { before_data }),
- )(tokenizer),
+ |ok| {
+ State::Fn(if ok {
+ StateName::TextBefore
+ } else {
+ StateName::TextBeforeData
+ })
+ },
+ ),
}
}
@@ -76,8 +75,8 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
/// ```markdown
/// |qwe
/// ```
-fn before_data(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(data, before)(tokenizer)
+pub fn before_data(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.go(StateName::DataStart, StateName::TextBefore)
}
/// Resolve whitespace.
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index c641419..b080b46 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -21,9 +21,8 @@
//! thus the whole document needs to be parsed up to the level of definitions,
//! before any level that can include references can be parsed.
-use crate::content::{string::start as string, text::start as text};
use crate::parser::ParseState;
-use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer};
+use crate::tokenizer::{ContentType, Event, EventType, State, StateName, Tokenizer};
use crate::util::edit_map::EditMap;
/// Create a link between two [`Event`][]s.
@@ -79,11 +78,11 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
// Subtokenizer.
let mut tokenizer = Tokenizer::new(event.point.clone(), parse_state);
// Substate.
- let mut state = State::Fn(Box::new(if link.content_type == ContentType::String {
- string
+ let mut state = State::Fn(if link.content_type == ContentType::String {
+ StateName::StringStart
} else {
- text
- }));
+ StateName::TextStart
+ });
// Loop through links to pass them in order to the subtokenizer.
while let Some(index) = link_index {
@@ -92,7 +91,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
debug_assert_eq!(enter.event_type, EventType::Enter);
if link_curr.previous != None {
- tokenizer.define_skip(&enter.point);
+ tokenizer.define_skip(enter.point.clone());
}
state = tokenizer.push(
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 3068ddf..7d28b77 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -12,6 +12,8 @@
//! [`check`]: Tokenizer::check
use crate::constant::TAB_SIZE;
+use crate::construct;
+use crate::content;
use crate::parser::ParseState;
use crate::token::{Token, VOID_TOKENS};
use crate::util::edit_map::EditMap;
@@ -19,10 +21,12 @@ use crate::util::edit_map::EditMap;
/// Embedded content type.
#[derive(Debug, Clone, PartialEq)]
pub enum ContentType {
- /// Represents [text content][crate::content::text].
- Text,
+ /// Represents [flow content][crate::content::flow].
+ Flow,
/// Represents [string content][crate::content::string].
String,
+ /// Represents [text content][crate::content::text].
+ Text,
}
#[derive(Debug, PartialEq)]
@@ -79,10 +83,9 @@ pub struct Event {
pub link: Option<Link>,
}
-/// The essence of the state machine are functions: `StateFn`.
-/// It’s responsible for dealing with the current byte.
-/// It yields a [`State`][].
-pub type StateFn = dyn FnOnce(&mut Tokenizer) -> State;
+pub struct Attempt {
+ done: Box<dyn FnOnce(&mut Tokenizer, State) -> State + 'static>,
+}
/// Callback that can be registered and is called when the tokenizer is done.
///
@@ -91,10 +94,619 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer) -> State;
/// the compiler and other users.
pub type Resolver = dyn FnOnce(&mut Tokenizer);
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum StateName {
+ AttentionStart,
+ AttentionInside,
+
+ AutolinkStart,
+ AutolinkOpen,
+ AutolinkSchemeOrEmailAtext,
+ AutolinkSchemeInsideOrEmailAtext,
+ AutolinkUrlInside,
+ AutolinkEmailAtSignOrDot,
+ AutolinkEmailAtext,
+ AutolinkEmailValue,
+ AutolinkEmailLabel,
+
+ BlankLineStart,
+ BlankLineAfter,
+
+ BlockQuoteStart,
+ BlockQuoteBefore,
+ BlockQuoteContStart,
+ BlockQuoteContBefore,
+ BlockQuoteContAfter,
+
+ BomStart,
+ BomInside,
+
+ CharacterEscapeStart,
+ CharacterEscapeInside,
+
+ CharacterReferenceStart,
+ CharacterReferenceOpen,
+ CharacterReferenceNumeric,
+ CharacterReferenceValue,
+
+ CodeFencedStart,
+ CodeFencedBeforeSequenceOpen,
+ CodeFencedSequenceOpen,
+ CodeFencedInfoBefore,
+ CodeFencedInfo,
+ CodeFencedMetaBefore,
+ CodeFencedMeta,
+ CodeFencedAtNonLazyBreak,
+ CodeFencedCloseBefore,
+ CodeFencedCloseStart,
+ CodeFencedBeforeSequenceClose,
+ CodeFencedSequenceClose,
+ CodeFencedAfterSequenceClose,
+ CodeFencedContentBefore,
+ CodeFencedContentStart,
+ CodeFencedBeforeContentChunk,
+ CodeFencedContentChunk,
+ CodeFencedAfter,
+
+ CodeIndentedStart,
+ CodeIndentedAtBreak,
+ CodeIndentedAfter,
+ CodeIndentedFurtherStart,
+ CodeIndentedInside,
+ CodeIndentedFurtherEnd,
+ CodeIndentedFurtherBegin,
+ CodeIndentedFurtherAfter,
+
+ CodeTextStart,
+ CodeTextSequenceOpen,
+ CodeTextBetween,
+ CodeTextData,
+ CodeTextSequenceClose,
+
+ DataStart,
+ DataInside,
+ DataAtBreak,
+
+ DefinitionStart,
+ DefinitionBefore,
+ DefinitionLabelAfter,
+ DefinitionMarkerAfter,
+ DefinitionDestinationBefore,
+ DefinitionDestinationAfter,
+ DefinitionDestinationMissing,
+ DefinitionTitleBefore,
+ DefinitionAfter,
+ DefinitionAfterWhitespace,
+ DefinitionTitleBeforeMarker,
+ DefinitionTitleAfter,
+ DefinitionTitleAfterOptionalWhitespace,
+
+ DestinationStart,
+ DestinationEnclosedBefore,
+ DestinationEnclosed,
+ DestinationEnclosedEscape,
+ DestinationRaw,
+ DestinationRawEscape,
+
+ DocumentStart,
+ DocumentLineStart,
+ // DocumentContainerExistingBefore,
+ DocumentContainerExistingAfter,
+ DocumentContainerExistingMissing,
+ // DocumentContainerNewBefore,
+ DocumentContainerNewBeforeNotBlockQuote,
+ DocumentContainerNewAfter,
+ DocumentContainersAfter,
+ DocumentFlowInside,
+ DocumentFlowEnd,
+
+ FlowStart,
+ FlowBefore,
+ FlowAfter,
+ FlowBlankLineAfter,
+ FlowBeforeParagraph,
+
+ HardBreakEscapeStart,
+ HardBreakEscapeAfter,
+
+ HeadingAtxStart,
+ HeadingAtxBefore,
+ HeadingAtxSequenceOpen,
+ HeadingAtxAtBreak,
+ HeadingAtxSequenceFurther,
+ HeadingAtxData,
+
+ HeadingSetextStart,
+ HeadingSetextBefore,
+ HeadingSetextInside,
+ HeadingSetextAfter,
+
+ HtmlFlowStart,
+ HtmlFlowBefore,
+ HtmlFlowOpen,
+ HtmlFlowDeclarationOpen,
+ HtmlFlowCommentOpenInside,
+ HtmlFlowCdataOpenInside,
+ HtmlFlowTagCloseStart,
+ HtmlFlowTagName,
+ HtmlFlowBasicSelfClosing,
+ HtmlFlowCompleteClosingTagAfter,
+ HtmlFlowCompleteEnd,
+ HtmlFlowCompleteAttributeNameBefore,
+ HtmlFlowCompleteAttributeName,
+ HtmlFlowCompleteAttributeNameAfter,
+ HtmlFlowCompleteAttributeValueBefore,
+ HtmlFlowCompleteAttributeValueQuoted,
+ HtmlFlowCompleteAttributeValueQuotedAfter,
+ HtmlFlowCompleteAttributeValueUnquoted,
+ HtmlFlowCompleteAfter,
+ HtmlFlowBlankLineBefore,
+ HtmlFlowContinuation,
+ HtmlFlowContinuationDeclarationInside,
+ HtmlFlowContinuationAfter,
+ HtmlFlowContinuationStart,
+ HtmlFlowContinuationBefore,
+ HtmlFlowContinuationCommentInside,
+ HtmlFlowContinuationRawTagOpen,
+ HtmlFlowContinuationRawEndTag,
+ HtmlFlowContinuationClose,
+ HtmlFlowContinuationCdataInside,
+ HtmlFlowContinuationStartNonLazy,
+
+ HtmlTextStart,
+ HtmlTextOpen,
+ HtmlTextDeclarationOpen,
+ HtmlTextTagCloseStart,
+ HtmlTextTagClose,
+ HtmlTextTagCloseBetween,
+ HtmlTextTagOpen,
+ HtmlTextTagOpenBetween,
+ HtmlTextTagOpenAttributeName,
+ HtmlTextTagOpenAttributeNameAfter,
+ HtmlTextTagOpenAttributeValueBefore,
+ HtmlTextTagOpenAttributeValueQuoted,
+ HtmlTextTagOpenAttributeValueQuotedAfter,
+ HtmlTextTagOpenAttributeValueUnquoted,
+ HtmlTextCdata,
+ HtmlTextCdataOpenInside,
+ HtmlTextCdataClose,
+ HtmlTextCdataEnd,
+ HtmlTextCommentOpenInside,
+ HtmlTextCommentStart,
+ HtmlTextCommentStartDash,
+ HtmlTextComment,
+ HtmlTextCommentClose,
+ HtmlTextDeclaration,
+ HtmlTextEnd,
+ HtmlTextInstruction,
+ HtmlTextInstructionClose,
+ HtmlTextLineEndingAfter,
+ HtmlTextLineEndingAfterPrefix,
+
+ LabelStart,
+ LabelAtBreak,
+ LabelEolAfter,
+ LabelAtBlankLine,
+ LabelEscape,
+ LabelInside,
+
+ LabelEndStart,
+ LabelEndAfter,
+ LabelEndResourceStart,
+ LabelEndResourceBefore,
+ LabelEndResourceOpen,
+ LabelEndResourceDestinationAfter,
+ LabelEndResourceDestinationMissing,
+ LabelEndResourceBetween,
+ LabelEndResourceTitleAfter,
+ LabelEndResourceEnd,
+ LabelEndOk,
+ LabelEndNok,
+ LabelEndReferenceFull,
+ LabelEndReferenceFullAfter,
+ LabelEndReferenceNotFull,
+ LabelEndReferenceCollapsed,
+ LabelEndReferenceCollapsedOpen,
+
+ LabelStartImageStart,
+ LabelStartImageOpen,
+
+ LabelStartLinkStart,
+
+ ListStart,
+ ListBefore,
+ ListNok,
+ ListBeforeUnordered,
+ ListValue,
+ ListMarkerAfter,
+ ListAfter,
+ ListMarkerAfterFilled,
+ ListWhitespace,
+ ListPrefixOther,
+ ListWhitespaceAfter,
+ ListContStart,
+ ListContBlank,
+ ListContFilled,
+ ListOk,
+
+ NonLazyContinuationStart,
+ NonLazyContinuationAfter,
+
+ ParagraphStart,
+ ParagraphInside,
+
+ SpaceOrTabStart,
+ SpaceOrTabInside,
+
+ SpaceOrTabEolStart,
+ SpaceOrTabEolAfterFirst,
+ SpaceOrTabEolAfterEol,
+ SpaceOrTabEolAtEol,
+ SpaceOrTabEolAfterMore,
+
+ StringStart,
+ StringBefore,
+ StringBeforeData,
+
+ TextStart,
+ TextBefore,
+ TextBeforeData,
+
+ ThematicBreakStart,
+ ThematicBreakBefore,
+ ThematicBreakSequence,
+ ThematicBreakAtBreak,
+
+ TitleStart,
+ TitleBegin,
+ TitleAfterEol,
+ TitleAtBlankLine,
+ TitleEscape,
+ TitleInside,
+}
+
+impl StateName {
+ /// Create a new tokenizer.
+ #[allow(clippy::too_many_lines)]
+ pub fn to_func(self) -> Box<dyn FnOnce(&mut Tokenizer) -> State + 'static> {
+ let func = match self {
+ StateName::AttentionStart => construct::attention::start,
+ StateName::AttentionInside => construct::attention::inside,
+
+ StateName::AutolinkStart => construct::autolink::start,
+ StateName::AutolinkOpen => construct::autolink::open,
+ StateName::AutolinkSchemeOrEmailAtext => construct::autolink::scheme_or_email_atext,
+ StateName::AutolinkSchemeInsideOrEmailAtext => {
+ construct::autolink::scheme_inside_or_email_atext
+ }
+ StateName::AutolinkUrlInside => construct::autolink::url_inside,
+ StateName::AutolinkEmailAtSignOrDot => construct::autolink::email_at_sign_or_dot,
+ StateName::AutolinkEmailAtext => construct::autolink::email_atext,
+ StateName::AutolinkEmailValue => construct::autolink::email_value,
+ StateName::AutolinkEmailLabel => construct::autolink::email_label,
+
+ StateName::BlankLineStart => construct::blank_line::start,
+ StateName::BlankLineAfter => construct::blank_line::after,
+
+ StateName::BlockQuoteStart => construct::block_quote::start,
+ StateName::BlockQuoteBefore => construct::block_quote::before,
+ StateName::BlockQuoteContStart => construct::block_quote::cont_start,
+ StateName::BlockQuoteContBefore => construct::block_quote::cont_before,
+ StateName::BlockQuoteContAfter => construct::block_quote::cont_after,
+
+ StateName::BomStart => construct::partial_bom::start,
+ StateName::BomInside => construct::partial_bom::inside,
+
+ StateName::CharacterEscapeStart => construct::character_escape::start,
+ StateName::CharacterEscapeInside => construct::character_escape::inside,
+
+ StateName::CharacterReferenceStart => construct::character_reference::start,
+ StateName::CharacterReferenceOpen => construct::character_reference::open,
+ StateName::CharacterReferenceNumeric => construct::character_reference::numeric,
+ StateName::CharacterReferenceValue => construct::character_reference::value,
+
+ StateName::CodeFencedStart => construct::code_fenced::start,
+ StateName::CodeFencedBeforeSequenceOpen => construct::code_fenced::before_sequence_open,
+ StateName::CodeFencedSequenceOpen => construct::code_fenced::sequence_open,
+ StateName::CodeFencedInfoBefore => construct::code_fenced::info_before,
+ StateName::CodeFencedInfo => construct::code_fenced::info,
+ StateName::CodeFencedMetaBefore => construct::code_fenced::meta_before,
+ StateName::CodeFencedMeta => construct::code_fenced::meta,
+ StateName::CodeFencedAtNonLazyBreak => construct::code_fenced::at_non_lazy_break,
+ StateName::CodeFencedCloseBefore => construct::code_fenced::close_before,
+ StateName::CodeFencedCloseStart => construct::code_fenced::close_start,
+ StateName::CodeFencedBeforeSequenceClose => {
+ construct::code_fenced::before_sequence_close
+ }
+ StateName::CodeFencedSequenceClose => construct::code_fenced::sequence_close,
+ StateName::CodeFencedAfterSequenceClose => construct::code_fenced::sequence_close_after,
+ StateName::CodeFencedContentBefore => construct::code_fenced::content_before,
+ StateName::CodeFencedContentStart => construct::code_fenced::content_start,
+ StateName::CodeFencedBeforeContentChunk => construct::code_fenced::before_content_chunk,
+ StateName::CodeFencedContentChunk => construct::code_fenced::content_chunk,
+ StateName::CodeFencedAfter => construct::code_fenced::after,
+
+ StateName::CodeIndentedStart => construct::code_indented::start,
+ StateName::CodeIndentedAtBreak => construct::code_indented::at_break,
+ StateName::CodeIndentedAfter => construct::code_indented::after,
+ StateName::CodeIndentedFurtherStart => construct::code_indented::further_start,
+ StateName::CodeIndentedInside => construct::code_indented::inside,
+ StateName::CodeIndentedFurtherEnd => construct::code_indented::further_end,
+ StateName::CodeIndentedFurtherBegin => construct::code_indented::further_begin,
+ StateName::CodeIndentedFurtherAfter => construct::code_indented::further_after,
+
+ StateName::CodeTextStart => construct::code_text::start,
+ StateName::CodeTextSequenceOpen => construct::code_text::sequence_open,
+ StateName::CodeTextBetween => construct::code_text::between,
+ StateName::CodeTextData => construct::code_text::data,
+ StateName::CodeTextSequenceClose => construct::code_text::sequence_close,
+
+ StateName::DataStart => construct::partial_data::start,
+ StateName::DataInside => construct::partial_data::inside,
+ StateName::DataAtBreak => construct::partial_data::at_break,
+
+ StateName::DefinitionStart => construct::definition::start,
+ StateName::DefinitionBefore => construct::definition::before,
+ StateName::DefinitionLabelAfter => construct::definition::label_after,
+ StateName::DefinitionMarkerAfter => construct::definition::marker_after,
+ StateName::DefinitionDestinationBefore => construct::definition::destination_before,
+ StateName::DefinitionDestinationAfter => construct::definition::destination_after,
+ StateName::DefinitionDestinationMissing => construct::definition::destination_missing,
+ StateName::DefinitionTitleBefore => construct::definition::title_before,
+ StateName::DefinitionAfter => construct::definition::after,
+ StateName::DefinitionAfterWhitespace => construct::definition::after_whitespace,
+ StateName::DefinitionTitleBeforeMarker => construct::definition::title_before_marker,
+ StateName::DefinitionTitleAfter => construct::definition::title_after,
+ StateName::DefinitionTitleAfterOptionalWhitespace => {
+ construct::definition::title_after_optional_whitespace
+ }
+
+ StateName::DestinationStart => construct::partial_destination::start,
+ StateName::DestinationEnclosedBefore => construct::partial_destination::enclosed_before,
+ StateName::DestinationEnclosed => construct::partial_destination::enclosed,
+ StateName::DestinationEnclosedEscape => construct::partial_destination::enclosed_escape,
+ StateName::DestinationRaw => construct::partial_destination::raw,
+ StateName::DestinationRawEscape => construct::partial_destination::raw_escape,
+
+ StateName::DocumentStart => content::document::start,
+ StateName::DocumentLineStart => content::document::line_start,
+ // StateName::DocumentContainerExistingBefore => content::document::container_existing_before,
+ StateName::DocumentContainerExistingAfter => {
+ content::document::container_existing_after
+ }
+ StateName::DocumentContainerExistingMissing => {
+ content::document::container_existing_missing
+ }
+ // StateName::DocumentContainerNewBefore => content::document::container_new_before,
+ StateName::DocumentContainerNewBeforeNotBlockQuote => {
+ content::document::container_new_before_not_block_quote
+ }
+ StateName::DocumentContainerNewAfter => content::document::container_new_after,
+ StateName::DocumentContainersAfter => content::document::containers_after,
+ StateName::DocumentFlowEnd => content::document::flow_end,
+ StateName::DocumentFlowInside => content::document::flow_inside,
+
+ StateName::FlowStart => content::flow::start,
+ StateName::FlowBefore => content::flow::before,
+ StateName::FlowAfter => content::flow::after,
+ StateName::FlowBlankLineAfter => content::flow::blank_line_after,
+ StateName::FlowBeforeParagraph => content::flow::before_paragraph,
+
+ StateName::HardBreakEscapeStart => construct::hard_break_escape::start,
+ StateName::HardBreakEscapeAfter => construct::hard_break_escape::after,
+
+ StateName::HeadingAtxStart => construct::heading_atx::start,
+ StateName::HeadingAtxBefore => construct::heading_atx::before,
+ StateName::HeadingAtxSequenceOpen => construct::heading_atx::sequence_open,
+ StateName::HeadingAtxAtBreak => construct::heading_atx::at_break,
+ StateName::HeadingAtxSequenceFurther => construct::heading_atx::sequence_further,
+ StateName::HeadingAtxData => construct::heading_atx::data,
+
+ StateName::HeadingSetextStart => construct::heading_setext::start,
+ StateName::HeadingSetextBefore => construct::heading_setext::before,
+ StateName::HeadingSetextInside => construct::heading_setext::inside,
+ StateName::HeadingSetextAfter => construct::heading_setext::after,
+
+ StateName::HtmlFlowStart => construct::html_flow::start,
+ StateName::HtmlFlowBefore => construct::html_flow::before,
+ StateName::HtmlFlowOpen => construct::html_flow::open,
+ StateName::HtmlFlowDeclarationOpen => construct::html_flow::declaration_open,
+ StateName::HtmlFlowCommentOpenInside => construct::html_flow::comment_open_inside,
+ StateName::HtmlFlowCdataOpenInside => construct::html_flow::cdata_open_inside,
+ StateName::HtmlFlowTagCloseStart => construct::html_flow::tag_close_start,
+ StateName::HtmlFlowTagName => construct::html_flow::tag_name,
+ StateName::HtmlFlowBasicSelfClosing => construct::html_flow::basic_self_closing,
+ StateName::HtmlFlowCompleteClosingTagAfter => {
+ construct::html_flow::complete_closing_tag_after
+ }
+ StateName::HtmlFlowCompleteEnd => construct::html_flow::complete_end,
+ StateName::HtmlFlowCompleteAttributeNameBefore => {
+ construct::html_flow::complete_attribute_name_before
+ }
+ StateName::HtmlFlowCompleteAttributeName => {
+ construct::html_flow::complete_attribute_name
+ }
+ StateName::HtmlFlowCompleteAttributeNameAfter => {
+ construct::html_flow::complete_attribute_name_after
+ }
+ StateName::HtmlFlowCompleteAttributeValueBefore => {
+ construct::html_flow::complete_attribute_value_before
+ }
+ StateName::HtmlFlowCompleteAttributeValueQuoted => {
+ construct::html_flow::complete_attribute_value_quoted
+ }
+ StateName::HtmlFlowCompleteAttributeValueQuotedAfter => {
+ construct::html_flow::complete_attribute_value_quoted_after
+ }
+ StateName::HtmlFlowCompleteAttributeValueUnquoted => {
+ construct::html_flow::complete_attribute_value_unquoted
+ }
+ StateName::HtmlFlowCompleteAfter => construct::html_flow::complete_after,
+ StateName::HtmlFlowBlankLineBefore => construct::html_flow::blank_line_before,
+ StateName::HtmlFlowContinuation => construct::html_flow::continuation,
+ StateName::HtmlFlowContinuationDeclarationInside => {
+ construct::html_flow::continuation_declaration_inside
+ }
+ StateName::HtmlFlowContinuationAfter => construct::html_flow::continuation_after,
+ StateName::HtmlFlowContinuationStart => construct::html_flow::continuation_start,
+ StateName::HtmlFlowContinuationBefore => construct::html_flow::continuation_before,
+ StateName::HtmlFlowContinuationCommentInside => {
+ construct::html_flow::continuation_comment_inside
+ }
+ StateName::HtmlFlowContinuationRawTagOpen => {
+ construct::html_flow::continuation_raw_tag_open
+ }
+ StateName::HtmlFlowContinuationRawEndTag => {
+ construct::html_flow::continuation_raw_end_tag
+ }
+ StateName::HtmlFlowContinuationClose => construct::html_flow::continuation_close,
+ StateName::HtmlFlowContinuationCdataInside => {
+ construct::html_flow::continuation_cdata_inside
+ }
+ StateName::HtmlFlowContinuationStartNonLazy => {
+ construct::html_flow::continuation_start_non_lazy
+ }
+
+ StateName::HtmlTextStart => construct::html_text::start,
+ StateName::HtmlTextOpen => construct::html_text::open,
+ StateName::HtmlTextDeclarationOpen => construct::html_text::declaration_open,
+ StateName::HtmlTextTagCloseStart => construct::html_text::tag_close_start,
+ StateName::HtmlTextTagClose => construct::html_text::tag_close,
+ StateName::HtmlTextTagCloseBetween => construct::html_text::tag_close_between,
+ StateName::HtmlTextTagOpen => construct::html_text::tag_open,
+ StateName::HtmlTextTagOpenBetween => construct::html_text::tag_open_between,
+ StateName::HtmlTextTagOpenAttributeName => {
+ construct::html_text::tag_open_attribute_name
+ }
+ StateName::HtmlTextTagOpenAttributeNameAfter => {
+ construct::html_text::tag_open_attribute_name_after
+ }
+ StateName::HtmlTextTagOpenAttributeValueBefore => {
+ construct::html_text::tag_open_attribute_value_before
+ }
+ StateName::HtmlTextTagOpenAttributeValueQuoted => {
+ construct::html_text::tag_open_attribute_value_quoted
+ }
+ StateName::HtmlTextTagOpenAttributeValueQuotedAfter => {
+ construct::html_text::tag_open_attribute_value_quoted_after
+ }
+ StateName::HtmlTextTagOpenAttributeValueUnquoted => {
+ construct::html_text::tag_open_attribute_value_unquoted
+ }
+ StateName::HtmlTextCdata => construct::html_text::cdata,
+ StateName::HtmlTextCdataOpenInside => construct::html_text::cdata_open_inside,
+ StateName::HtmlTextCdataClose => construct::html_text::cdata_close,
+ StateName::HtmlTextCdataEnd => construct::html_text::cdata_end,
+ StateName::HtmlTextCommentOpenInside => construct::html_text::comment_open_inside,
+ StateName::HtmlTextCommentStart => construct::html_text::comment_start,
+ StateName::HtmlTextCommentStartDash => construct::html_text::comment_start_dash,
+ StateName::HtmlTextComment => construct::html_text::comment,
+ StateName::HtmlTextCommentClose => construct::html_text::comment_close,
+ StateName::HtmlTextDeclaration => construct::html_text::declaration,
+ StateName::HtmlTextEnd => construct::html_text::end,
+ StateName::HtmlTextInstruction => construct::html_text::instruction,
+ StateName::HtmlTextInstructionClose => construct::html_text::instruction_close,
+ StateName::HtmlTextLineEndingAfter => construct::html_text::line_ending_after,
+ StateName::HtmlTextLineEndingAfterPrefix => {
+ construct::html_text::line_ending_after_prefix
+ }
+
+ StateName::LabelStart => construct::partial_label::start,
+ StateName::LabelAtBreak => construct::partial_label::at_break,
+ StateName::LabelEolAfter => construct::partial_label::eol_after,
+ StateName::LabelAtBlankLine => construct::partial_label::at_blank_line,
+ StateName::LabelEscape => construct::partial_label::escape,
+ StateName::LabelInside => construct::partial_label::inside,
+
+ StateName::LabelEndStart => construct::label_end::start,
+ StateName::LabelEndAfter => construct::label_end::after,
+ StateName::LabelEndResourceStart => construct::label_end::resource_start,
+ StateName::LabelEndResourceBefore => construct::label_end::resource_before,
+ StateName::LabelEndResourceOpen => construct::label_end::resource_open,
+ StateName::LabelEndResourceDestinationAfter => {
+ construct::label_end::resource_destination_after
+ }
+ StateName::LabelEndResourceDestinationMissing => {
+ construct::label_end::resource_destination_missing
+ }
+ StateName::LabelEndResourceBetween => construct::label_end::resource_between,
+ StateName::LabelEndResourceTitleAfter => construct::label_end::resource_title_after,
+ StateName::LabelEndResourceEnd => construct::label_end::resource_end,
+ StateName::LabelEndOk => construct::label_end::ok,
+ StateName::LabelEndNok => construct::label_end::nok,
+ StateName::LabelEndReferenceFull => construct::label_end::reference_full,
+ StateName::LabelEndReferenceFullAfter => construct::label_end::reference_full_after,
+ StateName::LabelEndReferenceNotFull => construct::label_end::reference_not_full,
+ StateName::LabelEndReferenceCollapsed => construct::label_end::reference_collapsed,
+ StateName::LabelEndReferenceCollapsedOpen => {
+ construct::label_end::reference_collapsed_open
+ }
+
+ StateName::LabelStartImageStart => construct::label_start_image::start,
+ StateName::LabelStartImageOpen => construct::label_start_image::open,
+ StateName::LabelStartLinkStart => construct::label_start_link::start,
+
+ StateName::ListStart => construct::list::start,
+ StateName::ListBefore => construct::list::before,
+ StateName::ListNok => construct::list::nok,
+ StateName::ListBeforeUnordered => construct::list::before_unordered,
+ StateName::ListValue => construct::list::value,
+ StateName::ListMarkerAfter => construct::list::marker_after,
+ StateName::ListAfter => construct::list::after,
+ StateName::ListMarkerAfterFilled => construct::list::marker_after_filled,
+ StateName::ListWhitespace => construct::list::whitespace,
+ StateName::ListWhitespaceAfter => construct::list::whitespace_after,
+ StateName::ListPrefixOther => construct::list::prefix_other,
+ StateName::ListContStart => construct::list::cont_start,
+ StateName::ListContBlank => construct::list::cont_blank,
+ StateName::ListContFilled => construct::list::cont_filled,
+ StateName::ListOk => construct::list::ok,
+
+ StateName::NonLazyContinuationStart => construct::partial_non_lazy_continuation::start,
+ StateName::NonLazyContinuationAfter => construct::partial_non_lazy_continuation::after,
+
+ StateName::ParagraphStart => construct::paragraph::start,
+ StateName::ParagraphInside => construct::paragraph::inside,
+
+ StateName::SpaceOrTabStart => construct::partial_space_or_tab::start,
+ StateName::SpaceOrTabInside => construct::partial_space_or_tab::inside,
+
+ StateName::SpaceOrTabEolStart => construct::partial_space_or_tab::eol_start,
+ StateName::SpaceOrTabEolAfterFirst => construct::partial_space_or_tab::eol_after_first,
+ StateName::SpaceOrTabEolAfterEol => construct::partial_space_or_tab::eol_after_eol,
+ StateName::SpaceOrTabEolAtEol => construct::partial_space_or_tab::eol_at_eol,
+ StateName::SpaceOrTabEolAfterMore => construct::partial_space_or_tab::eol_after_more,
+
+ StateName::StringStart => content::string::start,
+ StateName::StringBefore => content::string::before,
+ StateName::StringBeforeData => content::string::before_data,
+
+ StateName::TextStart => content::text::start,
+ StateName::TextBefore => content::text::before,
+ StateName::TextBeforeData => content::text::before_data,
+
+ StateName::ThematicBreakStart => construct::thematic_break::start,
+ StateName::ThematicBreakBefore => construct::thematic_break::before,
+ StateName::ThematicBreakSequence => construct::thematic_break::sequence,
+ StateName::ThematicBreakAtBreak => construct::thematic_break::at_break,
+
+ StateName::TitleStart => construct::partial_title::start,
+ StateName::TitleBegin => construct::partial_title::begin,
+ StateName::TitleAfterEol => construct::partial_title::after_eol,
+ StateName::TitleAtBlankLine => construct::partial_title::at_blank_line,
+ StateName::TitleEscape => construct::partial_title::escape,
+ StateName::TitleInside => construct::partial_title::inside,
+ };
+
+ Box::new(func)
+ }
+}
+
/// The result of a state.
+#[derive(Debug, PartialEq)]
pub enum State {
- /// There is a future state: a boxed [`StateFn`][] to pass the next code to.
- Fn(Box<StateFn>),
+ /// There is a future state: a [`StateName`][] to pass the next code to.
+ Fn(StateName),
/// The state is successful.
Ok,
/// The state is not successful.
@@ -163,7 +775,7 @@ struct InternalState {
/// To do
#[allow(clippy::struct_excessive_bools)]
-pub struct TokenizeState {
+pub struct TokenizeState<'a> {
/// To do.
pub connect: bool,
/// To do.
@@ -171,15 +783,15 @@ pub struct TokenizeState {
/// To do.
pub document_continued: usize,
/// To do.
- pub document_index: usize,
- /// To do.
- pub document_inject: Vec<(Vec<Event>, Vec<Event>)>,
- /// To do.
pub document_interrupt_before: bool,
/// To do.
pub document_paragraph_before: bool,
/// To do.
- pub document_next: Option<Box<StateFn>>,
+ pub document_data_index: Option<usize>,
+ /// To do.
+ pub document_child_state: Option<State>,
+ /// To do.
+ pub child_tokenizer: Option<Box<Tokenizer<'a>>>,
/// To do.
pub marker: u8,
/// To do.
@@ -187,7 +799,7 @@ pub struct TokenizeState {
/// To do.
pub prefix: usize,
/// To do.
- pub return_state: Option<Box<StateFn>>,
+ pub return_state: Option<StateName>,
/// To do.
pub seen: bool,
/// To do.
@@ -234,7 +846,7 @@ pub struct Tokenizer<'a> {
/// Track whether this tokenizer is done.
resolved: bool,
/// To do.
- attempt_balance: usize,
+ attempts: Vec<Attempt>,
/// Current byte.
pub current: Option<u8>,
/// Previous byte.
@@ -251,13 +863,13 @@ pub struct Tokenizer<'a> {
pub map: EditMap,
/// List of attached resolvers, which will be called when done feeding,
/// to clean events.
- resolvers: Vec<Box<Resolver>>,
+ pub resolvers: Vec<Box<Resolver>>,
/// List of names associated with attached resolvers.
- resolver_ids: Vec<String>,
+ pub resolver_ids: Vec<String>,
/// Shared parsing state across tokenizers.
pub parse_state: &'a ParseState<'a>,
/// To do.
- pub tokenize_state: TokenizeState,
+ pub tokenize_state: TokenizeState<'a>,
/// Stack of label (start) that could form images and links.
///
/// Used when tokenizing [text content][crate::content::text].
@@ -299,7 +911,7 @@ impl<'a> Tokenizer<'a> {
line_start: point.clone(),
consumed: true,
resolved: false,
- attempt_balance: 0,
+ attempts: vec![],
point,
stack: vec![],
events: vec![],
@@ -308,11 +920,11 @@ impl<'a> Tokenizer<'a> {
connect: false,
document_container_stack: vec![],
document_continued: 0,
- document_index: 0,
- document_inject: vec![],
document_interrupt_before: false,
document_paragraph_before: false,
- document_next: None,
+ document_data_index: None,
+ document_child_state: None,
+ child_tokenizer: None,
marker: 0,
marker_other: 0,
prefix: 0,
@@ -369,13 +981,22 @@ impl<'a> Tokenizer<'a> {
}
/// Define a jump between two places.
- pub fn define_skip(&mut self, point: &Point) {
- define_skip_impl(self, point.line, (point.index, point.vs));
- }
+ ///
+ /// This defines to which future index we move after a line ending.
+ pub fn define_skip(&mut self, mut point: Point) {
+ move_point_back(self, &mut point);
+
+ let info = (point.index, point.vs);
+ log::debug!("position: define skip: {:?} -> ({:?})", point.line, info);
+ let at = point.line - self.first_line;
+
+ if at >= self.column_start.len() {
+ self.column_start.push(info);
+ } else {
+ self.column_start[at] = info;
+ }
- /// Define the current place as a jump between two places.
- pub fn define_skip_current(&mut self) {
- define_skip_impl(self, self.point.line, (self.point.index, self.point.vs));
+ self.account_for_potential_skip();
}
/// Increment the current positional info if we’re right after a line
@@ -396,8 +1017,8 @@ impl<'a> Tokenizer<'a> {
}
/// Consume the current byte.
- /// Each [`StateFn`][] is expected to call this to signal that this code is
- /// used, or call a next `StateFn`.
+ /// Each state function is expected to call this to signal that this code is
+ /// used, or call a next function.
pub fn consume(&mut self) {
log::debug!("consume: `{:?}` ({:?})", self.current, self.point);
debug_assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned");
@@ -473,16 +1094,7 @@ impl<'a> Tokenizer<'a> {
pub fn enter_with_link(&mut self, token_type: Token, link: Option<Link>) {
let mut point = self.point.clone();
-
- // Move back past ignored bytes.
- while point.index > 0 {
- point.index -= 1;
- let action = byte_action(self.parse_state.bytes, &point);
- if !matches!(action, ByteAction::Ignore) {
- point.index += 1;
- break;
- }
- }
+ move_point_back(self, &mut point);
log::debug!("enter: `{:?}` ({:?})", token_type, point);
self.events.push(Event {
@@ -527,15 +1139,7 @@ impl<'a> Tokenizer<'a> {
if matches!(self.previous, Some(b'\n')) {
point = self.line_start.clone();
} else {
- // Move back past ignored bytes.
- while point.index > 0 {
- point.index -= 1;
- let action = byte_action(self.parse_state.bytes, &point);
- if !matches!(action, ByteAction::Ignore) {
- point.index += 1;
- break;
- }
- }
+ move_point_back(self, &mut point);
}
log::debug!("exit: `{:?}` ({:?})", token_type, point);
@@ -575,29 +1179,20 @@ impl<'a> Tokenizer<'a> {
self.stack.truncate(previous.stack_len);
}
- /// Parse with `state_fn` and its future states, switching to `ok` when
+ /// Parse with `state_name` and its future states, switching to `ok` when
/// successful, and passing [`State::Nok`][] back up if it occurs.
///
/// This function does not capture the current state, in case of
/// `State::Nok`, as it is assumed that this `go` is itself wrapped in
/// another `attempt`.
#[allow(clippy::unused_self)]
- pub fn go(
- &mut self,
- state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
- after: impl FnOnce(&mut Tokenizer) -> State + 'static,
- ) -> Box<StateFn> {
- self.attempt_balance += 1;
+ pub fn go(&mut self, state_name: StateName, after: StateName) -> State {
attempt_impl(
- state_fn,
- None,
- self.point.index,
- |tokenizer: &mut Tokenizer, state| {
- tokenizer.attempt_balance -= 1;
-
+ self,
+ state_name,
+ Box::new(move |_tokenizer: &mut Tokenizer, state| {
if matches!(state, State::Ok) {
- tokenizer.consumed = true;
- State::Fn(Box::new(after))
+ State::Fn(after)
} else {
// Must be `Nok`.
// We don’t capture/free state because it is assumed that
@@ -605,132 +1200,122 @@ impl<'a> Tokenizer<'a> {
// if it can occur.
state
}
- },
- )
- }
-
- /// Like `go`, but this lets you *hijack* back to some other state after a
- /// certain code.
- #[allow(clippy::unused_self)]
- pub fn go_until(
- &mut self,
- state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
- until: impl Fn(Option<u8>) -> bool + 'static,
- done: impl FnOnce(State) -> Box<StateFn> + 'static,
- ) -> Box<StateFn> {
- self.attempt_balance += 1;
- attempt_impl(
- state_fn,
- Some(Box::new(until)),
- self.point.index,
- |tokenizer: &mut Tokenizer, state| {
- tokenizer.attempt_balance -= 1;
- tokenizer.consumed = true;
- // We don’t capture/free state because it is assumed that
- // `go_until` itself is wrapped in another attempt that does
- // that if it can occur.
- State::Fn(done(state))
- },
+ }),
)
}
- /// Parse with `state_fn` and its future states, to check if it result in
+ /// Parse with `state_name` and its future states, to check if it result in
/// [`State::Ok`][] or [`State::Nok`][], revert on both cases, and then
/// call `done` with whether it was successful or not.
///
/// This captures the current state of the tokenizer, returns a wrapped
- /// state that captures all codes and feeds them to `state_fn` and its
+ /// state that captures all codes and feeds them to `state_name` and its
/// future states until it yields `State::Ok` or `State::Nok`.
/// It then applies the captured state, calls `done`, and feeds all
/// captured codes to its future states.
pub fn check(
&mut self,
- state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
- done: impl FnOnce(bool) -> Box<StateFn> + 'static,
- ) -> Box<StateFn> {
- self.attempt_balance += 1;
+ state_name: StateName,
+ done: impl FnOnce(bool) -> State + 'static,
+ ) -> State {
let previous = self.capture();
attempt_impl(
- state_fn,
- None,
- self.point.index,
- |tokenizer: &mut Tokenizer, state| {
- tokenizer.attempt_balance -= 1;
+ self,
+ state_name,
+ Box::new(|tokenizer: &mut Tokenizer, state| {
tokenizer.free(previous);
tokenizer.consumed = true;
- State::Fn(done(matches!(state, State::Ok)))
- },
+ done(matches!(state, State::Ok))
+ }),
)
}
- /// Parse with `state_fn` and its future states, to check if it results in
+ /// Parse with `state_name` and its future states, to check if it results in
/// [`State::Ok`][] or [`State::Nok`][], revert on the case of
/// `State::Nok`, and then call `done` with whether it was successful or
/// not.
///
/// This captures the current state of the tokenizer, returns a wrapped
- /// state that captures all codes and feeds them to `state_fn` and its
+ /// state that captures all codes and feeds them to `state_name` and its
/// future states until it yields `State::Ok`, at which point it calls
/// `done` and yields its result.
/// If instead `State::Nok` was yielded, the captured state is applied,
/// `done` is called, and all captured codes are fed to its future states.
pub fn attempt(
&mut self,
- state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
- done: impl FnOnce(bool) -> Box<StateFn> + 'static,
- ) -> Box<StateFn> {
- self.attempt_balance += 1;
+ state_name: StateName,
+ done: impl FnOnce(bool) -> State + 'static,
+ ) -> State {
let previous = self.capture();
+ log::debug!("attempting: {:?}", state_name);
+ // self.consumed = false;
attempt_impl(
- state_fn,
- None,
- self.point.index,
- |tokenizer: &mut Tokenizer, state| {
- tokenizer.attempt_balance -= 1;
+ self,
+ state_name,
+ Box::new(move |tokenizer: &mut Tokenizer, state| {
let ok = matches!(state, State::Ok);
if !ok {
tokenizer.free(previous);
+ tokenizer.consumed = true;
}
- log::debug!("attempt: {:?}, at {:?}", ok, tokenizer.point);
+ log::debug!(
+ "attempted {:?}: {:?}, at {:?}",
+ state_name,
+ ok,
+ tokenizer.point
+ );
- tokenizer.consumed = true;
- State::Fn(done(ok))
- },
+ done(ok)
+ }),
)
}
/// Just like [`attempt`][Tokenizer::attempt], but many.
pub fn attempt_n(
&mut self,
- mut state_fns: Vec<Box<StateFn>>,
- done: impl FnOnce(bool) -> Box<StateFn> + 'static,
- ) -> Box<StateFn> {
- if state_fns.is_empty() {
+ mut state_names: Vec<StateName>,
+ done: impl FnOnce(bool) -> State + 'static,
+ ) -> State {
+ if state_names.is_empty() {
done(false)
} else {
- let state_fn = state_fns.remove(0);
- self.attempt(state_fn, move |ok| {
- if ok {
- done(ok)
- } else {
- Box::new(|t| t.attempt_n(state_fns, done)(t))
- }
- })
+ let previous = self.capture();
+ let state_name = state_names.remove(0);
+ self.consumed = false;
+ log::debug!("attempting (n): {:?}", state_name);
+ attempt_impl(
+ self,
+ state_name,
+ Box::new(move |tokenizer: &mut Tokenizer, state| {
+ let ok = matches!(state, State::Ok);
+
+ log::debug!(
+ "attempted (n) {:?}: {:?}, at {:?}",
+ state_name,
+ ok,
+ tokenizer.point
+ );
+
+ if ok {
+ done(true)
+ } else {
+ tokenizer.free(previous);
+ tokenizer.consumed = true;
+ tokenizer.attempt_n(state_names, done)
+ }
+ }),
+ )
}
}
/// Just like [`attempt`][Tokenizer::attempt], but for when you don’t care
/// about `ok`.
- pub fn attempt_opt(
- &mut self,
- state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
- after: impl FnOnce(&mut Tokenizer) -> State + 'static,
- ) -> Box<StateFn> {
- self.attempt(state_fn, |_ok| Box::new(after))
+ pub fn attempt_opt(&mut self, state_name: StateName, after: StateName) -> State {
+ self.attempt(state_name, move |_ok| State::Fn(after))
}
/// Feed a list of `codes` into `start`.
@@ -738,30 +1323,40 @@ impl<'a> Tokenizer<'a> {
/// This is set up to support repeatedly calling `feed`, and thus streaming
/// markdown into the state machine, and normally pauses after feeding.
// Note: if needed: accept `vs`?
- pub fn push(
- &mut self,
- min: usize,
- max: usize,
- start: impl FnOnce(&mut Tokenizer) -> State + 'static,
- ) -> State {
+ pub fn push(&mut self, min: usize, max: usize, state_name: StateName) -> State {
debug_assert!(!self.resolved, "cannot feed after drain");
- debug_assert!(min >= self.point.index, "cannot move backwards");
- self.move_to((min, 0));
+ // debug_assert!(min >= self.point.index, "cannot move backwards");
+ if min > self.point.index {
+ self.move_to((min, 0));
+ }
- let mut state = State::Fn(Box::new(start));
+ let mut state = State::Fn(state_name);
while self.point.index < max {
match state {
- State::Ok | State::Nok => break,
- State::Fn(func) => match byte_action(self.parse_state.bytes, &self.point) {
+ State::Ok | State::Nok => {
+ if let Some(attempt) = self.attempts.pop() {
+ let done = attempt.done;
+ self.consumed = true;
+ state = done(self, state);
+ } else {
+ break;
+ }
+ }
+ State::Fn(state_name) => match byte_action(self.parse_state.bytes, &self.point) {
ByteAction::Ignore => {
- state = State::Fn(Box::new(func));
+ state = State::Fn(state_name);
self.move_one();
}
ByteAction::Insert(byte) | ByteAction::Normal(byte) => {
- log::debug!("main: passing: `{:?}` ({:?})", byte, self.point);
+ log::debug!(
+ "main: passing: `{:?}` ({:?}) to {:?}",
+ byte,
+ self.point,
+ state_name
+ );
self.expect(Some(byte));
- state = func(self);
+ state = call_impl(self, state_name);
}
},
}
@@ -778,8 +1373,16 @@ impl<'a> Tokenizer<'a> {
loop {
match state {
- State::Ok | State::Nok => break,
- State::Fn(func) => {
+ State::Ok | State::Nok => {
+ if let Some(attempt) = self.attempts.pop() {
+ let done = attempt.done;
+ self.consumed = true;
+ state = done(self, state);
+ } else {
+ break;
+ }
+ }
+ State::Fn(state_name) => {
// We sometimes move back when flushing, so then we use those codes.
let action = if self.point.index == max {
None
@@ -788,7 +1391,7 @@ impl<'a> Tokenizer<'a> {
};
if let Some(ByteAction::Ignore) = action {
- state = State::Fn(Box::new(func));
+ state = State::Fn(state_name);
self.move_one();
} else {
let byte =
@@ -800,14 +1403,20 @@ impl<'a> Tokenizer<'a> {
None
};
- log::debug!("main: flushing: `{:?}` ({:?})", byte, self.point);
+ log::debug!(
+ "main: flushing: `{:?}` ({:?}) to {:?}",
+ byte,
+ self.point,
+ state_name
+ );
self.expect(byte);
- state = func(self);
+ state = call_impl(self, state_name);
}
}
}
}
+ self.consumed = true;
debug_assert!(matches!(state, State::Ok), "must be ok");
if resolve {
@@ -869,80 +1478,29 @@ fn byte_action(bytes: &[u8], point: &Point) -> ByteAction {
/// Recurses into itself.
/// Used in [`Tokenizer::attempt`][Tokenizer::attempt] and [`Tokenizer::check`][Tokenizer::check].
fn attempt_impl(
- state: impl FnOnce(&mut Tokenizer) -> State + 'static,
- pause: Option<Box<dyn Fn(Option<u8>) -> bool + 'static>>,
- start: usize,
- done: impl FnOnce(&mut Tokenizer, State) -> State + 'static,
-) -> Box<StateFn> {
- Box::new(move |tokenizer| {
- if let Some(ref func) = pause {
- if tokenizer.point.index > start && func(tokenizer.previous) {
- return done(tokenizer, State::Fn(Box::new(state)));
- }
- }
+ tokenizer: &mut Tokenizer,
+ state_name: StateName,
+ done: Box<impl FnOnce(&mut Tokenizer, State) -> State + 'static>,
+) -> State {
+ tokenizer.attempts.push(Attempt { done });
+ call_impl(tokenizer, state_name)
+}
- let state = state(tokenizer);
-
- match state {
- State::Ok | State::Nok => {
- if tokenizer.attempt_balance == 0 {
- debug_assert!(!tokenizer.tokenize_state.connect);
- debug_assert_eq!(tokenizer.tokenize_state.document_continued, 0);
- debug_assert_eq!(tokenizer.tokenize_state.document_index, 0);
- debug_assert!(!tokenizer.tokenize_state.document_interrupt_before);
- debug_assert!(!tokenizer.tokenize_state.document_paragraph_before);
- debug_assert_eq!(tokenizer.tokenize_state.marker, 0);
- debug_assert_eq!(tokenizer.tokenize_state.marker_other, 0);
- debug_assert_eq!(tokenizer.tokenize_state.prefix, 0);
- debug_assert!(!tokenizer.tokenize_state.seen);
- debug_assert_eq!(tokenizer.tokenize_state.size, 0);
- debug_assert_eq!(tokenizer.tokenize_state.size_other, 0);
- debug_assert_eq!(tokenizer.tokenize_state.stop.len(), 0);
- debug_assert_eq!(tokenizer.tokenize_state.start, 0);
- debug_assert_eq!(tokenizer.tokenize_state.end, 0);
- debug_assert!(tokenizer.tokenize_state.return_state.is_none());
- debug_assert!(!tokenizer.tokenize_state.space_or_tab_eol_connect);
- debug_assert!(!tokenizer.tokenize_state.space_or_tab_eol_ok);
- debug_assert!(tokenizer
- .tokenize_state
- .space_or_tab_eol_content_type
- .is_none());
- debug_assert!(!tokenizer.tokenize_state.space_or_tab_connect);
- debug_assert!(tokenizer.tokenize_state.space_or_tab_content_type.is_none());
- debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_min, 0);
- debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_max, 0);
- debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_size, 0);
- debug_assert_eq!(
- tokenizer.tokenize_state.space_or_tab_token,
- Token::SpaceOrTab
- );
- debug_assert_eq!(tokenizer.tokenize_state.token_1, Token::Data);
- debug_assert_eq!(tokenizer.tokenize_state.token_2, Token::Data);
- debug_assert_eq!(tokenizer.tokenize_state.token_3, Token::Data);
- debug_assert_eq!(tokenizer.tokenize_state.token_4, Token::Data);
- debug_assert_eq!(tokenizer.tokenize_state.token_5, Token::Data);
- }
+#[allow(clippy::too_many_lines)]
+fn call_impl(tokenizer: &mut Tokenizer, state_name: StateName) -> State {
+ let func = state_name.to_func();
- done(tokenizer, state)
- }
- State::Fn(func) => State::Fn(attempt_impl(func, pause, start, done)),
- }
- })
+ func(tokenizer)
}
-/// Flush `start`: pass `eof`s to it until done.
-/// Define a jump between two places.
-///
-/// This defines to which future index we move after a line ending.
-fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize)) {
- log::debug!("position: define skip: {:?} -> ({:?})", line, info);
- let at = line - tokenizer.first_line;
-
- if at >= tokenizer.column_start.len() {
- tokenizer.column_start.push(info);
- } else {
- tokenizer.column_start[at] = info;
+fn move_point_back(tokenizer: &mut Tokenizer, point: &mut Point) {
+ // Move back past ignored bytes.
+ while point.index > 0 {
+ point.index -= 1;
+ let action = byte_action(tokenizer.parse_state.bytes, point);
+ if !matches!(action, ByteAction::Ignore) {
+ point.index += 1;
+ break;
+ }
}
-
- tokenizer.account_for_potential_skip();
}