aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/construct/attention.rs10
-rw-r--r--src/construct/autolink.rs45
-rw-r--r--src/construct/character_reference.rs124
-rw-r--r--src/construct/code_fenced.rs170
-rw-r--r--src/construct/code_text.rs40
-rw-r--r--src/construct/definition.rs95
-rw-r--r--src/construct/heading_atx.rs22
-rw-r--r--src/construct/heading_setext.rs10
-rw-r--r--src/construct/html_flow.rs419
-rw-r--r--src/construct/html_text.rs95
-rw-r--r--src/construct/label_end.rs222
-rw-r--r--src/construct/list.rs42
-rw-r--r--src/construct/paragraph.rs4
-rw-r--r--src/construct/partial_bom.rs41
-rw-r--r--src/construct/partial_data.rs24
-rw-r--r--src/construct/partial_destination.rs119
-rw-r--r--src/construct/partial_label.rs116
-rw-r--r--src/construct/partial_space_or_tab.rs191
-rw-r--r--src/construct/partial_title.rs120
-rw-r--r--src/construct/partial_whitespace.rs10
-rw-r--r--src/construct/thematic_break.rs50
-rw-r--r--src/content/document.rs279
-rw-r--r--src/content/string.rs15
-rw-r--r--src/content/text.rs22
-rw-r--r--src/tokenizer.rs148
25 files changed, 1285 insertions, 1148 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 583fde2..fc2acfb 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -118,8 +118,9 @@ struct Sequence {
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'*' | b'_') if tokenizer.parse_state.constructs.attention => {
+ tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
tokenizer.enter(Token::AttentionSequence);
- inside(tokenizer, tokenizer.current.unwrap())
+ inside(tokenizer)
}
_ => State::Nok,
}
@@ -131,15 +132,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | **
/// ^^
/// ```
-fn inside(tokenizer: &mut Tokenizer, marker: u8) -> State {
+fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'*' | b'_') if tokenizer.current.unwrap() == marker => {
+ Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.consume();
- State::Fn(Box::new(move |t| inside(t, marker)))
+ State::Fn(Box::new(inside))
}
_ => {
tokenizer.exit(Token::AttentionSequence);
tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention));
+ tokenizer.tokenize_state.marker = b'\0';
State::Ok
}
}
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index bac291e..1444c61 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -158,7 +158,9 @@ fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumeric and `+`, `-`, and `.`.
Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
- scheme_inside_or_email_atext(tokenizer, 1)
+ // Count the previous alphabetical from `open` too.
+ tokenizer.tokenize_state.size = 1;
+ scheme_inside_or_email_atext(tokenizer)
}
_ => email_atext(tokenizer),
}
@@ -172,20 +174,25 @@ fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
/// > | a<user@example.com>b
/// ^
/// ```
-fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer, size: usize) -> State {
+fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b':') => {
tokenizer.consume();
+ tokenizer.tokenize_state.size = 0;
State::Fn(Box::new(url_inside))
}
// ASCII alphanumeric and `+`, `-`, and `.`.
Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
- if size < AUTOLINK_SCHEME_SIZE_MAX =>
+ if tokenizer.tokenize_state.size < AUTOLINK_SCHEME_SIZE_MAX =>
{
+ tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(move |t| scheme_inside_or_email_atext(t, size + 1)))
+ State::Fn(Box::new(scheme_inside_or_email_atext))
+ }
+ _ => {
+ tokenizer.tokenize_state.size = 0;
+ email_atext(tokenizer)
}
- _ => email_atext(tokenizer),
}
}
@@ -220,7 +227,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'@') => {
tokenizer.consume();
- State::Fn(Box::new(|t| email_at_sign_or_dot(t, 0)))
+ State::Fn(Box::new(email_at_sign_or_dot))
}
// ASCII atext.
//
@@ -255,10 +262,10 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
/// > | a<user.name@example.com>b
/// ^ ^
/// ```
-fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, size: usize) -> State {
+fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumeric.
- Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => email_value(tokenizer, size),
+ Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => email_value(tokenizer),
_ => State::Nok,
}
}
@@ -269,13 +276,15 @@ fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, size: usize) -> State {
/// > | a<user.name@example.com>b
/// ^
/// ```
-fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {
+fn email_label(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'.') => {
+ tokenizer.tokenize_state.size = 0;
tokenizer.consume();
- State::Fn(Box::new(|t| email_at_sign_or_dot(t, 0)))
+ State::Fn(Box::new(email_at_sign_or_dot))
}
Some(b'>') => {
+ tokenizer.tokenize_state.size = 0;
let index = tokenizer.events.len();
tokenizer.exit(Token::AutolinkProtocol);
// Change the token type.
@@ -283,7 +292,7 @@ fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {
tokenizer.events[index].token_type = Token::AutolinkEmail;
end(tokenizer)
}
- _ => email_value(tokenizer, size),
+ _ => email_value(tokenizer),
}
}
@@ -295,19 +304,25 @@ fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {
/// > | a<user.name@ex-ample.com>b
/// ^
/// ```
-fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State {
+fn email_value(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumeric or `-`.
- Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') if size < AUTOLINK_DOMAIN_SIZE_MAX => {
+ Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
+ if tokenizer.tokenize_state.size < AUTOLINK_DOMAIN_SIZE_MAX =>
+ {
let func = if matches!(tokenizer.current, Some(b'-')) {
email_value
} else {
email_label
};
+ tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(move |t| func(t, size + 1)))
+ State::Fn(Box::new(func))
+ }
+ _ => {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
- _ => State::Nok,
}
}
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 9393691..7cc74ba 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -69,17 +69,6 @@ use crate::token::Token;
use crate::tokenizer::{State, Tokenizer};
use crate::util::slice::Slice;
-/// State needed to parse character references.
-#[derive(Debug, Clone)]
-struct Info {
- /// Index of where value starts.
- start: usize,
- /// Marker of character reference.
- marker: u8,
- /// Maximum number of characters in the value for this kind.
- max: usize,
-}
-
/// Start of a character reference.
///
/// ```markdown
@@ -121,15 +110,9 @@ fn open(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::CharacterReferenceMarkerNumeric);
State::Fn(Box::new(numeric))
} else {
+ tokenizer.tokenize_state.marker = b'&';
tokenizer.enter(Token::CharacterReferenceValue);
- value(
- tokenizer,
- Info {
- start: tokenizer.point.index,
- marker: b'&',
- max: CHARACTER_REFERENCE_NAMED_SIZE_MAX,
- },
- )
+ value(tokenizer)
}
}
@@ -148,20 +131,12 @@ fn numeric(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal);
tokenizer.enter(Token::CharacterReferenceValue);
- let info = Info {
- start: tokenizer.point.index,
- marker: b'x',
- max: CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX,
- };
- State::Fn(Box::new(|t| value(t, info)))
+ tokenizer.tokenize_state.marker = b'x';
+ State::Fn(Box::new(value))
} else {
tokenizer.enter(Token::CharacterReferenceValue);
- let info = Info {
- start: tokenizer.point.index,
- marker: b'#',
- max: CHARACTER_REFERENCE_DECIMAL_SIZE_MAX,
- };
- value(tokenizer, info)
+ tokenizer.tokenize_state.marker = b'#';
+ value(tokenizer)
}
}
@@ -179,50 +154,57 @@ fn numeric(tokenizer: &mut Tokenizer) -> State {
/// > | a&#x9;b
/// ^
/// ```
-fn value(tokenizer: &mut Tokenizer, info: Info) -> State {
- let size = tokenizer.point.index - info.start;
+fn value(tokenizer: &mut Tokenizer) -> State {
+ if matches!(tokenizer.current, Some(b';')) && tokenizer.tokenize_state.size > 0 {
+ // Named.
+ if tokenizer.tokenize_state.marker == b'&' {
+ // Guaranteed to be valid ASCII bytes.
+ let slice = Slice::from_indices(
+ tokenizer.parse_state.bytes,
+ tokenizer.point.index - tokenizer.tokenize_state.size,
+ tokenizer.point.index,
+ );
+ let name = slice.as_str();
- match tokenizer.current {
- Some(b';') if size > 0 => {
- // Named.
- if info.marker == b'&' {
- // Guaranteed to be valid ASCII bytes.
- let slice = Slice::from_indices(
- tokenizer.parse_state.bytes,
- info.start,
- tokenizer.point.index,
- );
- let name = slice.as_str();
-
- if !CHARACTER_REFERENCES.iter().any(|d| d.0 == name) {
- return State::Nok;
- }
+ if !CHARACTER_REFERENCES.iter().any(|d| d.0 == name) {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ return State::Nok;
}
-
- tokenizer.exit(Token::CharacterReferenceValue);
- tokenizer.enter(Token::CharacterReferenceMarkerSemi);
- tokenizer.consume();
- tokenizer.exit(Token::CharacterReferenceMarkerSemi);
- tokenizer.exit(Token::CharacterReference);
- State::Ok
- }
- // ASCII digit, for named, decimal, and hexadecimal references.
- Some(b'0'..=b'9') if size < info.max => {
- tokenizer.consume();
- State::Fn(Box::new(|t| value(t, info)))
}
- // ASCII hex letters, for named and hexadecimal references.
- Some(b'A'..=b'F' | b'a'..=b'f')
- if matches!(info.marker, b'&' | b'x') && size < info.max =>
- {
- tokenizer.consume();
- State::Fn(Box::new(|t| value(t, info)))
- }
- // Non-hex ASCII alphabeticals, for named references.
- Some(b'G'..=b'Z' | b'g'..=b'z') if info.marker == b'&' && size < info.max => {
+
+ tokenizer.exit(Token::CharacterReferenceValue);
+ tokenizer.enter(Token::CharacterReferenceMarkerSemi);
+ tokenizer.consume();
+ tokenizer.exit(Token::CharacterReferenceMarkerSemi);
+ tokenizer.exit(Token::CharacterReference);
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ return State::Ok;
+ }
+
+ let max = match tokenizer.tokenize_state.marker {
+ b'&' => CHARACTER_REFERENCE_NAMED_SIZE_MAX,
+ b'x' => CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX,
+ b'#' => CHARACTER_REFERENCE_DECIMAL_SIZE_MAX,
+ _ => unreachable!("Unexpected marker `{}`", tokenizer.tokenize_state.marker),
+ };
+ let test = match tokenizer.tokenize_state.marker {
+ b'&' => u8::is_ascii_alphanumeric,
+ b'x' => u8::is_ascii_hexdigit,
+ b'#' => u8::is_ascii_digit,
+ _ => unreachable!("Unexpected marker `{}`", tokenizer.tokenize_state.marker),
+ };
+
+ if let Some(byte) = tokenizer.current {
+ if tokenizer.tokenize_state.size < max && test(&byte) {
+ tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(|t| value(t, info)))
+ return State::Fn(Box::new(value));
}
- _ => State::Nok,
}
+
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index c4c3e86..a22a0f9 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -110,18 +110,6 @@ use crate::token::Token;
use crate::tokenizer::{ContentType, State, Tokenizer};
use crate::util::slice::{Position, Slice};
-/// State needed to parse code (fenced).
-#[derive(Debug, Clone)]
-struct Info {
- /// Number of markers on the opening fence sequence.
- size: usize,
- /// Number of tabs or spaces of indentation before the opening fence
- /// sequence.
- prefix: usize,
- /// Marker of fences (`u8`).
- marker: u8,
-}
-
/// Start of fenced code.
///
/// ```markdown
@@ -173,15 +161,10 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
}
if let Some(b'`' | b'~') = tokenizer.current {
+ tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
+ tokenizer.tokenize_state.prefix = prefix;
tokenizer.enter(Token::CodeFencedFenceSequence);
- sequence_open(
- tokenizer,
- Info {
- prefix,
- size: 0,
- marker: tokenizer.current.unwrap(),
- },
- )
+ sequence_open(tokenizer)
} else {
State::Nok
}
@@ -195,20 +178,23 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn sequence_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn sequence_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'`' | b'~') if tokenizer.current.unwrap() == info.marker => {
+ Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
+ tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(|t| {
- info.size += 1;
- sequence_open(t, info)
- }))
+ State::Fn(Box::new(sequence_open))
}
- _ if info.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => {
+ _ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => {
tokenizer.exit(Token::CodeFencedFenceSequence);
- tokenizer.attempt_opt(space_or_tab(), |t| info_before(t, info))(tokenizer)
+ tokenizer.attempt_opt(space_or_tab(), info_before)(tokenizer)
+ }
+ _ => {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.prefix = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
- _ => State::Nok,
}
}
@@ -220,18 +206,18 @@ fn sequence_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn info_before(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn info_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
// Do not form containers.
tokenizer.concrete = true;
- at_break(tokenizer, info)
+ at_break(tokenizer)
}
_ => {
tokenizer.enter(Token::CodeFencedFenceInfo);
tokenizer.enter_with_content(Token::Data, Some(ContentType::String));
- info_inside(tokenizer, info)
+ info_inside(tokenizer)
}
}
}
@@ -244,7 +230,7 @@ fn info_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn info_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -252,17 +238,23 @@ fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
tokenizer.exit(Token::CodeFencedFence);
// Do not form containers.
tokenizer.concrete = true;
- at_break(tokenizer, info)
+ at_break(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceInfo);
- tokenizer.attempt_opt(space_or_tab(), |t| meta_before(t, info))(tokenizer)
+ tokenizer.attempt_opt(space_or_tab(), meta_before)(tokenizer)
+ }
+ Some(b'`') if tokenizer.tokenize_state.marker == b'`' => {
+ tokenizer.concrete = false;
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.prefix = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
- Some(b'`') if info.marker == b'`' => State::Nok,
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(|t| info_inside(t, info)))
+ State::Fn(Box::new(info_inside))
}
}
}
@@ -275,18 +267,18 @@ fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn meta_before(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn meta_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
// Do not form containers.
tokenizer.concrete = true;
- at_break(tokenizer, info)
+ at_break(tokenizer)
}
_ => {
tokenizer.enter(Token::CodeFencedFenceMeta);
tokenizer.enter_with_content(Token::Data, Some(ContentType::String));
- meta(tokenizer, info)
+ meta(tokenizer)
}
}
}
@@ -299,7 +291,7 @@ fn meta_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn meta(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn meta(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
@@ -307,12 +299,18 @@ fn meta(tokenizer: &mut Tokenizer, info: Info) -> State {
tokenizer.exit(Token::CodeFencedFence);
// Do not form containers.
tokenizer.concrete = true;
- at_break(tokenizer, info)
+ at_break(tokenizer)
+ }
+ Some(b'`') if tokenizer.tokenize_state.marker == b'`' => {
+ tokenizer.concrete = false;
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.prefix = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
- Some(b'`') if info.marker == b'`' => State::Nok,
_ => {
tokenizer.consume();
- State::Fn(Box::new(|t| meta(t, info)))
+ State::Fn(Box::new(meta))
}
}
}
@@ -326,13 +324,9 @@ fn meta(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ^
/// | ~~~
/// ```
-fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn at_break(tokenizer: &mut Tokenizer) -> State {
tokenizer.check(partial_non_lazy_continuation, |ok| {
- if ok {
- Box::new(move |t| at_non_lazy_break(t, info))
- } else {
- Box::new(after)
- }
+ Box::new(if ok { at_non_lazy_break } else { after })
})(tokenizer)
}
@@ -345,19 +339,10 @@ fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ^
/// | ~~~
/// ```
-fn at_non_lazy_break(tokenizer: &mut Tokenizer, info: Info) -> State {
- let clone = info.clone();
-
- tokenizer.attempt(
- |t| close_begin(t, info),
- |ok| {
- if ok {
- Box::new(after)
- } else {
- Box::new(|t| content_before(t, clone))
- }
- },
- )(tokenizer)
+fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(close_begin, |ok| {
+ Box::new(if ok { after } else { content_before })
+ })(tokenizer)
}
/// Before a closing fence, at the line ending.
@@ -368,13 +353,13 @@ fn at_non_lazy_break(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ^
/// | ~~~
/// ```
-fn close_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn close_begin(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(|t| close_start(t, info)))
+ State::Fn(Box::new(close_start))
}
_ => unreachable!("expected eol"),
}
@@ -388,7 +373,7 @@ fn close_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn close_start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::CodeFencedFence);
tokenizer.go(
space_or_tab_min_max(
@@ -399,7 +384,7 @@ fn close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
usize::MAX
},
),
- |t| close_before(t, info),
+ close_before,
)(tokenizer)
}
@@ -411,11 +396,11 @@ fn close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_before(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn close_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'`' | b'~') if tokenizer.current.unwrap() == info.marker => {
+ Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.enter(Token::CodeFencedFenceSequence);
- close_sequence(tokenizer, info, 0)
+ close_sequence(tokenizer)
}
_ => State::Nok,
}
@@ -429,17 +414,24 @@ fn close_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | ~~~
/// ^
/// ```
-fn close_sequence(tokenizer: &mut Tokenizer, info: Info, size: usize) -> State {
+fn close_sequence(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'`' | b'~') if tokenizer.current.unwrap() == info.marker => {
+ Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
+ tokenizer.tokenize_state.size_other += 1;
tokenizer.consume();
- State::Fn(Box::new(move |t| close_sequence(t, info, size + 1)))
+ State::Fn(Box::new(close_sequence))
}
- _ if size >= CODE_FENCED_SEQUENCE_SIZE_MIN && size >= info.size => {
+ _ if tokenizer.tokenize_state.size_other >= CODE_FENCED_SEQUENCE_SIZE_MIN
+ && tokenizer.tokenize_state.size_other >= tokenizer.tokenize_state.size =>
+ {
+ tokenizer.tokenize_state.size_other = 0;
tokenizer.exit(Token::CodeFencedFenceSequence);
tokenizer.attempt_opt(space_or_tab(), close_sequence_after)(tokenizer)
}
- _ => State::Nok,
+ _ => {
+ tokenizer.tokenize_state.size_other = 0;
+ State::Nok
+ }
}
}
@@ -469,11 +461,11 @@ fn close_sequence_after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | ~~~
/// ```
-fn content_before(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn content_before(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(|t| content_start(t, info)))
+ State::Fn(Box::new(content_start))
}
/// Before code content, definitely not before a closing fence.
///
@@ -483,10 +475,11 @@ fn content_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ^
/// | ~~~
/// ```
-fn content_start(tokenizer: &mut Tokenizer, info: Info) -> State {
- tokenizer.go(space_or_tab_min_max(0, info.prefix), |t| {
- content_begin(t, info)
- })(tokenizer)
+fn content_start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.go(
+ space_or_tab_min_max(0, tokenizer.tokenize_state.prefix),
+ content_begin,
+ )(tokenizer)
}
/// Before code content, after a prefix.
@@ -497,12 +490,12 @@ fn content_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ^
/// | ~~~
/// ```
-fn content_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn content_begin(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') => at_break(tokenizer, info),
+ None | Some(b'\n') => at_break(tokenizer),
_ => {
tokenizer.enter(Token::CodeFlowChunk);
- content_continue(tokenizer, info)
+ content_continue(tokenizer)
}
}
}
@@ -515,15 +508,15 @@ fn content_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ^^^^^^^^^^^^^^
/// | ~~~
/// ```
-fn content_continue(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn content_continue(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::CodeFlowChunk);
- at_break(tokenizer, info)
+ at_break(tokenizer)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(|t| content_continue(t, info)))
+ State::Fn(Box::new(content_continue))
}
}
}
@@ -538,6 +531,9 @@ fn content_continue(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::CodeFenced);
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.prefix = 0;
+ tokenizer.tokenize_state.size = 0;
// Feel free to interrupt.
tokenizer.interrupt = false;
// No longer concrete.
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index 3f9e5e5..31777f4 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -105,7 +105,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
{
tokenizer.enter(Token::CodeText);
tokenizer.enter(Token::CodeTextSequence);
- sequence_open(tokenizer, 0)
+ sequence_open(tokenizer)
}
_ => State::Nok,
}
@@ -117,13 +117,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | `a`
/// ^
/// ```
-fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State {
+fn sequence_open(tokenizer: &mut Tokenizer) -> State {
if let Some(b'`') = tokenizer.current {
+ tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(move |t| sequence_open(t, size + 1)))
+ State::Fn(Box::new(sequence_open))
} else {
tokenizer.exit(Token::CodeTextSequence);
- between(tokenizer, size)
+ between(tokenizer)
}
}
@@ -133,22 +134,25 @@ fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State {
/// > | `a`
/// ^^
/// ```
-fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State {
+fn between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None => State::Nok,
+ None => {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ }
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(move |t| between(t, size_open)))
+ State::Fn(Box::new(between))
}
Some(b'`') => {
tokenizer.enter(Token::CodeTextSequence);
- sequence_close(tokenizer, size_open, 0)
+ sequence_close(tokenizer)
}
_ => {
tokenizer.enter(Token::CodeTextData);
- data(tokenizer, size_open)
+ data(tokenizer)
}
}
}
@@ -159,15 +163,15 @@ fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State {
/// > | `a`
/// ^
/// ```
-fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State {
+fn data(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n' | b'`') => {
tokenizer.exit(Token::CodeTextData);
- between(tokenizer, size_open)
+ between(tokenizer)
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(move |t| data(t, size_open)))
+ State::Fn(Box::new(data))
}
}
}
@@ -178,16 +182,19 @@ fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State {
/// > | `a`
/// ^
/// ```
-fn sequence_close(tokenizer: &mut Tokenizer, size_open: usize, size: usize) -> State {
+fn sequence_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'`') => {
+ tokenizer.tokenize_state.size_other += 1;
tokenizer.consume();
- State::Fn(Box::new(move |t| sequence_close(t, size_open, size + 1)))
+ State::Fn(Box::new(sequence_close))
}
_ => {
- if size_open == size {
+ if tokenizer.tokenize_state.size == tokenizer.tokenize_state.size_other {
tokenizer.exit(Token::CodeTextSequence);
tokenizer.exit(Token::CodeText);
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.size_other = 0;
State::Ok
} else {
let index = tokenizer.events.len();
@@ -195,7 +202,8 @@ fn sequence_close(tokenizer: &mut Tokenizer, size_open: usize, size: usize) -> S
// More or less accents: mark as data.
tokenizer.events[index - 1].token_type = Token::CodeTextData;
tokenizer.events[index].token_type = Token::CodeTextData;
- between(tokenizer, size_open)
+ tokenizer.tokenize_state.size_other = 0;
+ between(tokenizer)
}
}
}
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index bd7df82..a56dab4 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -94,10 +94,10 @@
//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
use crate::construct::{
- partial_destination::{start as destination, Options as DestinationOptions},
- partial_label::{start as label, Options as LabelOptions},
+ partial_destination::start as destination,
+ partial_label::start as label,
partial_space_or_tab::{space_or_tab, space_or_tab_eol},
- partial_title::{start as title, Options as TitleOptions},
+ partial_title::start as title,
};
use crate::token::Token;
use crate::tokenizer::{State, Tokenizer};
@@ -138,19 +138,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'[') => tokenizer.go(
- |t| {
- label(
- t,
- LabelOptions {
- label: Token::DefinitionLabel,
- marker: Token::DefinitionLabelMarker,
- string: Token::DefinitionLabelString,
- },
- )
- },
- label_after,
- )(tokenizer),
+ Some(b'[') => {
+ tokenizer.tokenize_state.token_1 = Token::DefinitionLabel;
+ tokenizer.tokenize_state.token_2 = Token::DefinitionLabelMarker;
+ tokenizer.tokenize_state.token_3 = Token::DefinitionLabelString;
+ tokenizer.go(label, label_after)(tokenizer)
+ }
_ => State::Nok,
}
}
@@ -162,6 +155,10 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn label_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Token::Data;
+ tokenizer.tokenize_state.token_2 = Token::Data;
+ tokenizer.tokenize_state.token_3 = Token::Data;
+
match tokenizer.current {
Some(b':') => {
tokenizer.enter(Token::DefinitionMarker);
@@ -182,22 +179,19 @@ fn label_after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn destination_before(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(
- |t| {
- destination(
- t,
- DestinationOptions {
- limit: usize::MAX,
- destination: Token::DefinitionDestination,
- literal: Token::DefinitionDestinationLiteral,
- marker: Token::DefinitionDestinationLiteralMarker,
- raw: Token::DefinitionDestinationRaw,
- string: Token::DefinitionDestinationString,
- },
- )
- },
- destination_after,
- )(tokenizer)
+ tokenizer.tokenize_state.token_1 = Token::DefinitionDestination;
+ tokenizer.tokenize_state.token_2 = Token::DefinitionDestinationLiteral;
+ tokenizer.tokenize_state.token_3 = Token::DefinitionDestinationLiteralMarker;
+ tokenizer.tokenize_state.token_4 = Token::DefinitionDestinationRaw;
+ tokenizer.tokenize_state.token_5 = Token::DefinitionDestinationString;
+ tokenizer.tokenize_state.size_other = usize::MAX;
+ tokenizer.attempt(destination, |ok| {
+ Box::new(if ok {
+ destination_after
+ } else {
+ destination_missing
+ })
+ })(tokenizer)
}
/// After a destination.
@@ -207,9 +201,26 @@ fn destination_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn destination_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Token::Data;
+ tokenizer.tokenize_state.token_2 = Token::Data;
+ tokenizer.tokenize_state.token_3 = Token::Data;
+ tokenizer.tokenize_state.token_4 = Token::Data;
+ tokenizer.tokenize_state.token_5 = Token::Data;
+ tokenizer.tokenize_state.size_other = 0;
tokenizer.attempt_opt(title_before, after)(tokenizer)
}
+/// Without destination.
+fn destination_missing(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Token::Data;
+ tokenizer.tokenize_state.token_2 = Token::Data;
+ tokenizer.tokenize_state.token_3 = Token::Data;
+ tokenizer.tokenize_state.token_4 = Token::Data;
+ tokenizer.tokenize_state.token_5 = Token::Data;
+ tokenizer.tokenize_state.size_other = 0;
+ State::Nok
+}
+
/// After a definition.
///
/// ```markdown
@@ -262,19 +273,10 @@ fn title_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn title_before_marker(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(
- |t| {
- title(
- t,
- TitleOptions {
- title: Token::DefinitionTitle,
- marker: Token::DefinitionTitleMarker,
- string: Token::DefinitionTitleString,
- },
- )
- },
- title_after,
- )(tokenizer)
+ tokenizer.tokenize_state.token_1 = Token::DefinitionTitle;
+ tokenizer.tokenize_state.token_2 = Token::DefinitionTitleMarker;
+ tokenizer.tokenize_state.token_3 = Token::DefinitionTitleString;
+ tokenizer.go(title, title_after)(tokenizer)
}
/// After a title.
@@ -284,6 +286,9 @@ fn title_before_marker(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn title_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Token::Data;
+ tokenizer.tokenize_state.token_2 = Token::Data;
+ tokenizer.tokenize_state.token_3 = Token::Data;
tokenizer.attempt_opt(space_or_tab(), title_after_after_optional_whitespace)(tokenizer)
}
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index d432b6c..6751567 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -93,7 +93,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
fn before(tokenizer: &mut Tokenizer) -> State {
if Some(b'#') == tokenizer.current {
tokenizer.enter(Token::HeadingAtxSequence);
- sequence_open(tokenizer, 0)
+ sequence_open(tokenizer)
} else {
State::Nok
}
@@ -105,23 +105,27 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | ## aa
/// ^
/// ```
-fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State {
+fn sequence_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') if size > 0 => {
+ None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => {
+ tokenizer.tokenize_state.size = 0;
tokenizer.exit(Token::HeadingAtxSequence);
at_break(tokenizer)
}
- Some(b'#') if size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
+ Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
+ tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(move |tokenizer| {
- sequence_open(tokenizer, size + 1)
- }))
+ State::Fn(Box::new(sequence_open))
}
- _ if size > 0 => {
+ _ if tokenizer.tokenize_state.size > 0 => {
+ tokenizer.tokenize_state.size = 0;
tokenizer.exit(Token::HeadingAtxSequence);
tokenizer.go(space_or_tab(), at_break)(tokenizer)
}
- _ => State::Nok,
+ _ => {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ }
}
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 98d7843..675b2ac 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -109,8 +109,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-' | b'=') => {
+ tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
tokenizer.enter(Token::HeadingSetextUnderline);
- inside(tokenizer, tokenizer.current.unwrap())
+ inside(tokenizer)
}
_ => State::Nok,
}
@@ -123,13 +124,14 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | ==
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer, marker: u8) -> State {
+fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'-' | b'=') if tokenizer.current.unwrap() == marker => {
+ Some(b'-' | b'=') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
tokenizer.consume();
- State::Fn(Box::new(move |t| inside(t, marker)))
+ State::Fn(Box::new(inside))
}
_ => {
+ tokenizer.tokenize_state.marker = 0;
tokenizer.exit(Token::HeadingSetextUnderline);
tokenizer.attempt_opt(space_or_tab(), after)(tokenizer)
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 064da35..aaa803d 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -1,4 +1,4 @@
-//! HTML (flow) is a construct that occurs in the [flow][] content type.
+//! HTML (flow) is a construct that occurs in the [flow][] cont&ent type.
//!
//! It forms with the following BNF:
//!
@@ -110,37 +110,20 @@ use crate::token::Token;
use crate::tokenizer::{State, Tokenizer};
use crate::util::slice::Slice;
-/// Kind of HTML (flow).
-#[derive(Debug, PartialEq)]
-enum Kind {
- /// Symbol for `<script>` (condition 1).
- Raw,
- /// Symbol for `<!---->` (condition 2).
- Comment,
- /// Symbol for `<?php?>` (condition 3).
- Instruction,
- /// Symbol for `<!doctype>` (condition 4).
- Declaration,
- /// Symbol for `<![CDATA[]]>` (condition 5).
- Cdata,
- /// Symbol for `<div` (condition 6).
- Basic,
- /// Symbol for `<x>` (condition 7).
- Complete,
-}
-
-/// State needed to parse HTML (flow).
-#[derive(Debug)]
-struct Info {
- /// Kind of HTML (flow).
- kind: Kind,
- /// Whether this is a start tag (`<` not followed by `/`).
- start_tag: bool,
- /// Start index of a tag name or cdata prefix.
- start: usize,
- /// Current quote, when in a double or single quoted attribute value.
- quote: u8,
-}
+/// Symbol for `<script>` (condition 1).
+const RAW: u8 = 1;
+/// Symbol for `<!---->` (condition 2).
+const COMMENT: u8 = 2;
+/// Symbol for `<?php?>` (condition 3).
+const INSTRUCTION: u8 = 3;
+/// Symbol for `<!doctype>` (condition 4).
+const DECLARATION: u8 = 4;
+/// Symbol for `<![CDATA[]]>` (condition 5).
+const CDATA: u8 = 5;
+/// Symbol for `<div` (condition 6).
+const BASIC: u8 = 6;
+/// Symbol for `<x>` (condition 7).
+const COMPLETE: u8 = 7;
/// Start of HTML (flow), before optional whitespace.
///
@@ -197,39 +180,30 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn open(tokenizer: &mut Tokenizer) -> State {
- let mut info = Info {
- // Assume basic.
- kind: Kind::Basic,
- // Assume closing tag (or no tag).
- start_tag: false,
- start: 0,
- quote: 0,
- };
-
match tokenizer.current {
Some(b'!') => {
tokenizer.consume();
- State::Fn(Box::new(|t| declaration_open(t, info)))
+ State::Fn(Box::new(declaration_open))
}
Some(b'/') => {
tokenizer.consume();
- info.start = tokenizer.point.index;
- State::Fn(Box::new(|t| tag_close_start(t, info)))
+ tokenizer.tokenize_state.seen = true;
+ tokenizer.tokenize_state.start = tokenizer.point.index;
+ State::Fn(Box::new(tag_close_start))
}
Some(b'?') => {
- info.kind = Kind::Instruction;
+ tokenizer.tokenize_state.marker = INSTRUCTION;
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
// While we’re in an instruction instead of a declaration, we’re on a `?`
// right now, so we do need to search for `>`, similar to declarations.
- State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
+ State::Fn(Box::new(continuation_declaration_inside))
}
// ASCII alphabetical.
Some(b'A'..=b'Z' | b'a'..=b'z') => {
- info.start_tag = true;
- info.start = tokenizer.point.index;
- tag_name(tokenizer, info)
+ tokenizer.tokenize_state.start = tokenizer.point.index;
+ tag_name(tokenizer)
}
_ => State::Nok,
}
@@ -245,25 +219,24 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | <![CDATA[>&<]]>
/// ^
/// ```
-fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn declaration_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- info.kind = Kind::Comment;
- State::Fn(Box::new(|t| comment_open_inside(t, info)))
+ tokenizer.tokenize_state.marker = COMMENT;
+ State::Fn(Box::new(comment_open_inside))
}
Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- info.kind = Kind::Declaration;
+ tokenizer.tokenize_state.marker = DECLARATION;
// Do not form containers.
tokenizer.concrete = true;
- State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
+ State::Fn(Box::new(continuation_declaration_inside))
}
Some(b'[') => {
tokenizer.consume();
- info.kind = Kind::Cdata;
- info.start = tokenizer.point.index;
- State::Fn(Box::new(|t| cdata_open_inside(t, info)))
+ tokenizer.tokenize_state.marker = CDATA;
+ State::Fn(Box::new(cdata_open_inside))
}
_ => State::Nok,
}
@@ -275,15 +248,15 @@ fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
- match tokenizer.current {
- Some(b'-') => {
- tokenizer.consume();
- // Do not form containers.
- tokenizer.concrete = true;
- State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
- }
- _ => State::Nok,
+fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
+ if let Some(b'-') = tokenizer.current {
+ tokenizer.consume();
+ // Do not form containers.
+ tokenizer.concrete = true;
+ State::Fn(Box::new(continuation_declaration_inside))
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
}
}
@@ -293,21 +266,23 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <![CDATA[>&<]]>
/// ^^^^^^
/// ```
-fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
- match tokenizer.current {
- Some(byte) if byte == HTML_CDATA_PREFIX[tokenizer.point.index - info.start] => {
- tokenizer.consume();
+fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
- if tokenizer.point.index - info.start == HTML_CDATA_PREFIX.len() {
- info.start = 0;
- // Do not form containers.
- tokenizer.concrete = true;
- State::Fn(Box::new(|t| continuation(t, info)))
- } else {
- State::Fn(Box::new(|t| cdata_open_inside(t, info)))
- }
+ if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() {
+ tokenizer.tokenize_state.size = 0;
+ // Do not form containers.
+ tokenizer.concrete = true;
+ State::Fn(Box::new(continuation))
+ } else {
+ State::Fn(Box::new(cdata_open_inside))
}
- _ => State::Nok,
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
}
@@ -317,14 +292,14 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// > | </x>
/// ^
/// ```
-fn tag_close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
- match tokenizer.current {
- // ASCII alphabetical.
- Some(b'A'..=b'Z' | b'a'..=b'z') => {
- tokenizer.consume();
- State::Fn(Box::new(|t| tag_name(t, info)))
- }
- _ => State::Nok,
+fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
+ if let Some(b'A'..=b'Z' | b'a'..=b'z') = tokenizer.current {
+ tokenizer.consume();
+ State::Fn(Box::new(tag_name))
+ } else {
+ tokenizer.tokenize_state.seen = false;
+ tokenizer.tokenize_state.start = 0;
+ State::Nok
}
}
@@ -336,14 +311,15 @@ fn tag_close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | </ab>
/// ^^
/// ```
-fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn tag_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => {
+ let closing_tag = tokenizer.tokenize_state.seen;
let slash = matches!(tokenizer.current, Some(b'/'));
// Guaranteed to be valid ASCII bytes.
let slice = Slice::from_indices(
tokenizer.parse_state.bytes,
- info.start,
+ tokenizer.tokenize_state.start,
tokenizer.point.index,
);
let name = slice
@@ -351,42 +327,48 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
// The line ending case might result in a `\r` that is already accounted for.
.trim()
.to_ascii_lowercase();
- info.start = 0;
+ tokenizer.tokenize_state.seen = false;
+ tokenizer.tokenize_state.start = 0;
- if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name.as_str()) {
- info.kind = Kind::Raw;
+ if !slash && !closing_tag && HTML_RAW_NAMES.contains(&name.as_str()) {
+ tokenizer.tokenize_state.marker = RAW;
// Do not form containers.
tokenizer.concrete = true;
- continuation(tokenizer, info)
+ continuation(tokenizer)
} else if HTML_BLOCK_NAMES.contains(&name.as_str()) {
- // Basic is assumed, no need to set `kind`.
+ tokenizer.tokenize_state.marker = BASIC;
+
if slash {
tokenizer.consume();
- State::Fn(Box::new(|t| basic_self_closing(t, info)))
+ State::Fn(Box::new(basic_self_closing))
} else {
// Do not form containers.
tokenizer.concrete = true;
- continuation(tokenizer, info)
+ continuation(tokenizer)
}
} else {
- info.kind = Kind::Complete;
+ tokenizer.tokenize_state.marker = COMPLETE;
// Do not support complete HTML when interrupting.
if tokenizer.interrupt && !tokenizer.lazy {
+ tokenizer.tokenize_state.marker = 0;
State::Nok
- } else if info.start_tag {
- complete_attribute_name_before(tokenizer, info)
+ } else if closing_tag {
+ complete_closing_tag_after(tokenizer)
} else {
- complete_closing_tag_after(tokenizer, info)
+ complete_attribute_name_before(tokenizer)
}
}
}
// ASCII alphanumerical and `-`.
Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(|t| tag_name(t, info)))
+ State::Fn(Box::new(tag_name))
+ }
+ Some(_) => {
+ tokenizer.tokenize_state.seen = false;
+ State::Nok
}
- Some(_) => State::Nok,
}
}
@@ -396,15 +378,15 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// > | <div/>
/// ^
/// ```
-fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {
- match tokenizer.current {
- Some(b'>') => {
- tokenizer.consume();
- // Do not form containers.
- tokenizer.concrete = true;
- State::Fn(Box::new(|t| continuation(t, info)))
- }
- _ => State::Nok,
+fn basic_self_closing(tokenizer: &mut Tokenizer) -> State {
+ if let Some(b'>') = tokenizer.current {
+ tokenizer.consume();
+ // Do not form containers.
+ tokenizer.concrete = true;
+ State::Fn(Box::new(continuation))
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
}
}
@@ -414,13 +396,13 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <x/>
/// ^
/// ```
-fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn complete_closing_tag_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_closing_tag_after(t, info)))
+ State::Fn(Box::new(complete_closing_tag_after))
}
- _ => complete_end(tokenizer, info),
+ _ => complete_end(tokenizer),
}
}
@@ -443,22 +425,22 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <a >
/// ^
/// ```
-fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn complete_attribute_name_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_name_before(t, info)))
+ State::Fn(Box::new(complete_attribute_name_before))
}
Some(b'/') => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_end(t, info)))
+ State::Fn(Box::new(complete_end))
}
// ASCII alphanumerical and `:` and `_`.
Some(b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_name(t, info)))
+ State::Fn(Box::new(complete_attribute_name))
}
- _ => complete_end(tokenizer, info),
+ _ => complete_end(tokenizer),
}
}
@@ -472,14 +454,14 @@ fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> Stat
/// > | <a b>
/// ^
/// ```
-fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn complete_attribute_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// ASCII alphanumerical and `-`, `.`, `:`, and `_`.
Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_name(t, info)))
+ State::Fn(Box::new(complete_attribute_name))
}
- _ => complete_attribute_name_after(tokenizer, info),
+ _ => complete_attribute_name_after(tokenizer),
}
}
@@ -492,17 +474,17 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <a b=c>
/// ^
/// ```
-fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn complete_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_name_after(t, info)))
+ State::Fn(Box::new(complete_attribute_name_after))
}
Some(b'=') => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))
+ State::Fn(Box::new(complete_attribute_value_before))
}
- _ => complete_attribute_name_before(tokenizer, info),
+ _ => complete_attribute_name_before(tokenizer),
}
}
@@ -515,19 +497,22 @@ fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State
/// > | <a b="c">
/// ^
/// ```
-fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok,
+ None | Some(b'<' | b'=' | b'>' | b'`') => {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
+ }
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))
+ State::Fn(Box::new(complete_attribute_value_before))
}
Some(b'"' | b'\'') => {
- info.quote = tokenizer.current.unwrap();
+ tokenizer.tokenize_state.marker_other = tokenizer.current.unwrap();
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_value_quoted(t, info)))
+ State::Fn(Box::new(complete_attribute_value_quoted))
}
- _ => complete_attribute_value_unquoted(tokenizer, info),
+ _ => complete_attribute_value_unquoted(tokenizer),
}
}
@@ -539,16 +524,23 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) ->
/// > | <a b='c'>
/// ^
/// ```
-fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') => State::Nok,
- Some(b'"' | b'\'') if tokenizer.current.unwrap() == info.quote => {
+ None | Some(b'\n') => {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.marker_other = 0;
+ State::Nok
+ }
+ Some(b'"' | b'\'')
+ if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker_other =>
+ {
+ tokenizer.tokenize_state.marker_other = 0;
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_value_quoted_after(t, info)))
+ State::Fn(Box::new(complete_attribute_value_quoted_after))
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_value_quoted(t, info)))
+ State::Fn(Box::new(complete_attribute_value_quoted))
}
}
}
@@ -559,14 +551,14 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> Sta
/// > | <a b=c>
/// ^
/// ```
-fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => {
- complete_attribute_name_after(tokenizer, info)
+ complete_attribute_name_after(tokenizer)
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_attribute_value_unquoted(t, info)))
+ State::Fn(Box::new(complete_attribute_value_unquoted))
}
}
}
@@ -578,10 +570,12 @@ fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> S
/// > | <a b="c">
/// ^
/// ```
-fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info) -> State {
- match tokenizer.current {
- Some(b'\t' | b' ' | b'/' | b'>') => complete_attribute_name_before(tokenizer, info),
- _ => State::Nok,
+fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
+ if let Some(b'\t' | b' ' | b'/' | b'>') = tokenizer.current {
+ complete_attribute_name_before(tokenizer)
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
}
}
@@ -591,13 +585,13 @@ fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info)
/// > | <a b="c">
/// ^
/// ```
-fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {
- match tokenizer.current {
- Some(b'>') => {
- tokenizer.consume();
- State::Fn(Box::new(|t| complete_after(t, info)))
- }
- _ => State::Nok,
+fn complete_end(tokenizer: &mut Tokenizer) -> State {
+ if let Some(b'>') = tokenizer.current {
+ tokenizer.consume();
+ State::Fn(Box::new(complete_after))
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
}
}
@@ -607,18 +601,21 @@ fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <x>
/// ^
/// ```
-fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn complete_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
// Do not form containers.
tokenizer.concrete = true;
- continuation(tokenizer, info)
+ continuation(tokenizer)
}
Some(b'\t' | b' ') => {
tokenizer.consume();
- State::Fn(Box::new(|t| complete_after(t, info)))
+ State::Fn(Box::new(complete_after))
+ }
+ Some(_) => {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
}
- Some(_) => State::Nok,
}
}
@@ -628,46 +625,49 @@ fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn continuation(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'\n') if info.kind == Kind::Basic || info.kind == Kind::Complete => {
+ Some(b'\n')
+ if tokenizer.tokenize_state.marker == BASIC
+ || tokenizer.tokenize_state.marker == COMPLETE =>
+ {
tokenizer.exit(Token::HtmlFlowData);
tokenizer.check(blank_line_before, |ok| {
- if ok {
- Box::new(continuation_after)
+ Box::new(if ok {
+ continuation_after
} else {
- Box::new(move |t| continuation_start(t, info))
- }
+ continuation_start
+ })
})(tokenizer)
}
// Note: important that this is after the basic/complete case.
None | Some(b'\n') => {
tokenizer.exit(Token::HtmlFlowData);
- continuation_start(tokenizer, info)
+ continuation_start(tokenizer)
}
- Some(b'-') if info.kind == Kind::Comment => {
+ Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_comment_inside(t, info)))
+ State::Fn(Box::new(continuation_comment_inside))
}
- Some(b'<') if info.kind == Kind::Raw => {
+ Some(b'<') if tokenizer.tokenize_state.marker == RAW => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_raw_tag_open(t, info)))
+ State::Fn(Box::new(continuation_raw_tag_open))
}
- Some(b'>') if info.kind == Kind::Declaration => {
+ Some(b'>') if tokenizer.tokenize_state.marker == DECLARATION => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_close(t, info)))
+ State::Fn(Box::new(continuation_close))
}
- Some(b'?') if info.kind == Kind::Instruction => {
+ Some(b'?') if tokenizer.tokenize_state.marker == INSTRUCTION => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
+ State::Fn(Box::new(continuation_declaration_inside))
}
- Some(b']') if info.kind == Kind::Cdata => {
+ Some(b']') if tokenizer.tokenize_state.marker == CDATA => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_character_data_inside(t, info)))
+ State::Fn(Box::new(continuation_character_data_inside))
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation(t, info)))
+ State::Fn(Box::new(continuation))
}
}
}
@@ -679,13 +679,13 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ^
/// | asd
/// ```
-fn continuation_start(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn continuation_start(tokenizer: &mut Tokenizer) -> State {
tokenizer.check(partial_non_lazy_continuation, |ok| {
- if ok {
- Box::new(move |t| continuation_start_non_lazy(t, info))
+ Box::new(if ok {
+ continuation_start_non_lazy
} else {
- Box::new(continuation_after)
- }
+ continuation_after
+ })
})(tokenizer)
}
@@ -696,13 +696,13 @@ fn continuation_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ^
/// | asd
/// ```
-fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn continuation_start_non_lazy(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(|t| continuation_before(t, info)))
+ State::Fn(Box::new(continuation_before))
}
_ => unreachable!("expected eol"),
}
@@ -715,12 +715,12 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | asd
/// ^
/// ```
-fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn continuation_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') => continuation_start(tokenizer, info),
+ None | Some(b'\n') => continuation_start(tokenizer),
_ => {
tokenizer.enter(Token::HtmlFlowData);
- continuation(tokenizer, info)
+ continuation(tokenizer)
}
}
}
@@ -731,13 +731,13 @@ fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <!--xxx-->
/// ^
/// ```
-fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn continuation_comment_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'-') => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
+ State::Fn(Box::new(continuation_declaration_inside))
}
- _ => continuation(tokenizer, info),
+ _ => continuation(tokenizer),
}
}
@@ -747,14 +747,14 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <script>console.log(1)</script>
/// ^
/// ```
-fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn continuation_raw_tag_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'/') => {
tokenizer.consume();
- info.start = tokenizer.point.index;
- State::Fn(Box::new(|t| continuation_raw_end_tag(t, info)))
+ tokenizer.tokenize_state.start = tokenizer.point.index;
+ State::Fn(Box::new(continuation_raw_end_tag))
}
- _ => continuation(tokenizer, info),
+ _ => continuation(tokenizer),
}
}
@@ -764,35 +764,35 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, mut info: Info) -> State
/// > | <script>console.log(1)</script>
/// ^^^^^^
/// ```
-fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn continuation_raw_end_tag(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
// Guaranteed to be valid ASCII bytes.
let slice = Slice::from_indices(
tokenizer.parse_state.bytes,
- info.start,
+ tokenizer.tokenize_state.start,
tokenizer.point.index,
);
let name = slice.as_str().to_ascii_lowercase();
- info.start = 0;
+ tokenizer.tokenize_state.start = 0;
if HTML_RAW_NAMES.contains(&name.as_str()) {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_close(t, info)))
+ State::Fn(Box::new(continuation_close))
} else {
- continuation(tokenizer, info)
+ continuation(tokenizer)
}
}
Some(b'A'..=b'Z' | b'a'..=b'z')
- if tokenizer.point.index - info.start < HTML_RAW_SIZE_MAX =>
+ if tokenizer.point.index - tokenizer.tokenize_state.start < HTML_RAW_SIZE_MAX =>
{
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_raw_end_tag(t, info)))
+ State::Fn(Box::new(continuation_raw_end_tag))
}
_ => {
- info.start = 0;
- continuation(tokenizer, info)
+ tokenizer.tokenize_state.start = 0;
+ continuation(tokenizer)
}
}
}
@@ -803,13 +803,13 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State
/// > | <![CDATA[>&<]]>
/// ^
/// ```
-fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn continuation_character_data_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b']') => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
+ State::Fn(Box::new(continuation_declaration_inside))
}
- _ => continuation(tokenizer, info),
+ _ => continuation(tokenizer),
}
}
@@ -827,17 +827,17 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) ->
/// > | <![CDATA[>&<]]>
/// ^
/// ```
-fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'>') => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_close(t, info)))
+ State::Fn(Box::new(continuation_close))
}
- Some(b'-') if info.kind == Kind::Comment => {
+ Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
+ State::Fn(Box::new(continuation_declaration_inside))
}
- _ => continuation(tokenizer, info),
+ _ => continuation(tokenizer),
}
}
@@ -847,7 +847,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> Sta
/// > | <!doctype>
/// ^
/// ```
-fn continuation_close(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn continuation_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::HtmlFlowData);
@@ -855,7 +855,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, info: Info) -> State {
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(|t| continuation_close(t, info)))
+ State::Fn(Box::new(continuation_close))
}
}
}
@@ -868,6 +868,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::HtmlFlow);
+ tokenizer.tokenize_state.marker = 0;
// Feel free to interrupt.
tokenizer.interrupt = false;
// No longer concrete.
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index 8a44c29..a4c0349 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -57,7 +57,7 @@
use crate::constant::HTML_CDATA_PREFIX;
use crate::construct::partial_space_or_tab::space_or_tab;
use crate::token::Token;
-use crate::tokenizer::{State, StateFn, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Start of HTML (text)
///
@@ -132,7 +132,7 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {
}
Some(b'[') => {
tokenizer.consume();
- State::Fn(Box::new(|t| cdata_open_inside(t, 0)))
+ State::Fn(Box::new(cdata_open_inside))
}
_ => State::Nok,
}
@@ -207,7 +207,10 @@ fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
fn comment(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
- Some(b'\n') => at_line_ending(tokenizer, Box::new(comment)),
+ Some(b'\n') => {
+ tokenizer.tokenize_state.return_state = Some(Box::new(comment));
+ at_line_ending(tokenizer)
+ }
Some(b'-') => {
tokenizer.consume();
State::Fn(Box::new(comment_close))
@@ -241,14 +244,16 @@ fn comment_close(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^^^^^^
/// ```
-fn cdata_open_inside(tokenizer: &mut Tokenizer, size: usize) -> State {
- if tokenizer.current == Some(HTML_CDATA_PREFIX[size]) {
+fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) {
+ tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- if size + 1 == HTML_CDATA_PREFIX.len() {
+ if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() {
+ tokenizer.tokenize_state.size = 0;
State::Fn(Box::new(cdata))
} else {
- State::Fn(Box::new(move |t| cdata_open_inside(t, size + 1)))
+ State::Fn(Box::new(cdata_open_inside))
}
} else {
State::Nok
@@ -264,7 +269,10 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, size: usize) -> State {
fn cdata(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
- Some(b'\n') => at_line_ending(tokenizer, Box::new(cdata)),
+ Some(b'\n') => {
+ tokenizer.tokenize_state.return_state = Some(Box::new(cdata));
+ at_line_ending(tokenizer)
+ }
Some(b']') => {
tokenizer.consume();
State::Fn(Box::new(cdata_close))
@@ -315,7 +323,10 @@ fn cdata_end(tokenizer: &mut Tokenizer) -> State {
fn declaration(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'>') => end(tokenizer),
- Some(b'\n') => at_line_ending(tokenizer, Box::new(declaration)),
+ Some(b'\n') => {
+ tokenizer.tokenize_state.return_state = Some(Box::new(declaration));
+ at_line_ending(tokenizer)
+ }
_ => {
tokenizer.consume();
State::Fn(Box::new(declaration))
@@ -332,7 +343,10 @@ fn declaration(tokenizer: &mut Tokenizer) -> State {
fn instruction(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
- Some(b'\n') => at_line_ending(tokenizer, Box::new(instruction)),
+ Some(b'\n') => {
+ tokenizer.tokenize_state.return_state = Some(Box::new(instruction));
+ at_line_ending(tokenizer)
+ }
Some(b'?') => {
tokenizer.consume();
State::Fn(Box::new(instruction_close))
@@ -399,7 +413,10 @@ fn tag_close(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_close_between)),
+ Some(b'\n') => {
+ tokenizer.tokenize_state.return_state = Some(Box::new(tag_close_between));
+ at_line_ending(tokenizer)
+ }
Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_close_between))
@@ -434,7 +451,10 @@ fn tag_open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_open_between)),
+ Some(b'\n') => {
+ tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_between));
+ at_line_ending(tokenizer)
+ }
Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_between))
@@ -478,7 +498,10 @@ fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_name_after)),
+ Some(b'\n') => {
+ tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_name_after));
+ at_line_ending(tokenizer)
+ }
Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_name_after))
@@ -501,17 +524,18 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok,
- Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_value_before)),
+ Some(b'\n') => {
+ tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_before));
+ at_line_ending(tokenizer)
+ }
Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_before))
}
Some(b'"' | b'\'') => {
- let marker = tokenizer.current.unwrap();
+ tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
tokenizer.consume();
- State::Fn(Box::new(move |t| {
- tag_open_attribute_value_quoted(t, marker)
- }))
+ State::Fn(Box::new(tag_open_attribute_value_quoted))
}
Some(_) => {
tokenizer.consume();
@@ -526,22 +550,24 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c="d"> e
/// ^
/// ```
-fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer, marker: u8) -> State {
+fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None => State::Nok,
- Some(b'\n') => at_line_ending(
- tokenizer,
- Box::new(move |t| tag_open_attribute_value_quoted(t, marker)),
- ),
- Some(b'"' | b'\'') if tokenizer.current.unwrap() == marker => {
+ None => {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
+ }
+ Some(b'\n') => {
+ tokenizer.tokenize_state.return_state = Some(Box::new(tag_open_attribute_value_quoted));
+ at_line_ending(tokenizer)
+ }
+ Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
+ tokenizer.tokenize_state.marker = 0;
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_quoted_after))
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(move |t| {
- tag_open_attribute_value_quoted(t, marker)
- }))
+ State::Fn(Box::new(tag_open_attribute_value_quoted))
}
}
}
@@ -605,14 +631,14 @@ fn end(tokenizer: &mut Tokenizer) -> State {
/// ^
/// | b-->
/// ```
-fn at_line_ending(tokenizer: &mut Tokenizer, return_state: Box<StateFn>) -> State {
+fn at_line_ending(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'\n') => {
tokenizer.exit(Token::HtmlTextData);
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(|t| after_line_ending(t, return_state)))
+ State::Fn(Box::new(after_line_ending))
}
_ => unreachable!("expected eol"),
}
@@ -628,10 +654,8 @@ fn at_line_ending(tokenizer: &mut Tokenizer, return_state: Box<StateFn>) -> Stat
/// > | b-->
/// ^
/// ```
-fn after_line_ending(tokenizer: &mut Tokenizer, return_state: Box<StateFn>) -> State {
- tokenizer.attempt_opt(space_or_tab(), |t| {
- after_line_ending_prefix(t, return_state)
- })(tokenizer)
+fn after_line_ending(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt_opt(space_or_tab(), after_line_ending_prefix)(tokenizer)
}
/// After a line ending, after indent.
@@ -644,7 +668,8 @@ fn after_line_ending(tokenizer: &mut Tokenizer, return_state: Box<StateFn>) -> S
/// > | b-->
/// ^
/// ```
-fn after_line_ending_prefix(tokenizer: &mut Tokenizer, return_state: Box<StateFn>) -> State {
+fn after_line_ending_prefix(tokenizer: &mut Tokenizer) -> State {
+ let return_state = tokenizer.tokenize_state.return_state.take().unwrap();
tokenizer.enter(Token::HtmlTextData);
return_state(tokenizer)
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index d3191a8..b38e15a 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -148,10 +148,8 @@
use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX;
use crate::construct::{
- partial_destination::{start as destination, Options as DestinationOptions},
- partial_label::{start as label, Options as LabelOptions},
- partial_space_or_tab::space_or_tab_eol,
- partial_title::{start as title, Options as TitleOptions},
+ partial_destination::start as destination, partial_label::start as label,
+ partial_space_or_tab::space_or_tab_eol, partial_title::start as title,
};
use crate::token::Token;
use crate::tokenizer::{Event, EventType, Media, State, Tokenizer};
@@ -161,15 +159,6 @@ use crate::util::{
slice::{Position, Slice},
};
-/// State needed to parse label end.
-#[derive(Debug)]
-struct Info {
- /// Index into `label_start_stack` of the corresponding opening.
- label_start_index: usize,
- /// The proposed `Media` that this seems to represent.
- media: Media,
-}
-
/// Start of label end.
///
/// ```markdown
@@ -202,36 +191,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
.get_mut(label_start_index)
.unwrap();
+ tokenizer.tokenize_state.start = label_start_index;
+ tokenizer.tokenize_state.end = tokenizer.events.len();
+
// Mark as balanced if the info is inactive.
if label_start.inactive {
- return nok(tokenizer, label_start_index);
+ return nok(tokenizer);
}
- let label_end_start = tokenizer.events.len();
-
- let info = Info {
- label_start_index,
- media: Media {
- start: label_start.start,
- end: (label_end_start, label_end_start + 3),
- id: normalize_identifier(
- // We don’t care about virtual spaces, so `indices` and `as_str` are fine.
- Slice::from_indices(
- tokenizer.parse_state.bytes,
- tokenizer.events[label_start.start.1].point.index,
- tokenizer.events[label_end_start - 1].point.index,
- )
- .as_str(),
- ),
- },
- };
-
tokenizer.enter(Token::LabelEnd);
tokenizer.enter(Token::LabelMarker);
tokenizer.consume();
tokenizer.exit(Token::LabelMarker);
tokenizer.exit(Token::LabelEnd);
- return State::Fn(Box::new(move |t| after(t, info)));
+ return State::Fn(Box::new(after));
}
}
@@ -250,40 +223,40 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// > | [a] b
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer, info: Info) -> State {
- let defined = tokenizer.parse_state.definitions.contains(&info.media.id);
+fn after(tokenizer: &mut Tokenizer) -> State {
+ let start = &tokenizer.label_start_stack[tokenizer.tokenize_state.start];
+ let defined = tokenizer
+ .parse_state
+ .definitions
+ .contains(&normalize_identifier(
+ // We don’t care about virtual spaces, so `indices` and `as_str` are fine.
+ Slice::from_indices(
+ tokenizer.parse_state.bytes,
+ tokenizer.events[start.start.1].point.index,
+ tokenizer.events[tokenizer.tokenize_state.end].point.index,
+ )
+ .as_str(),
+ ));
match tokenizer.current {
// Resource (`[asd](fgh)`)?
Some(b'(') => tokenizer.attempt(resource, move |is_ok| {
- Box::new(move |t| {
- // Also fine if `defined`, as then it’s a valid shortcut.
- if is_ok || defined {
- ok(t, info)
- } else {
- nok(t, info.label_start_index)
- }
- })
+ Box::new(if is_ok || defined { ok } else { nok })
})(tokenizer),
// Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference?
Some(b'[') => tokenizer.attempt(full_reference, move |is_ok| {
- Box::new(move |t| {
- if is_ok {
- ok(t, info)
- } else if defined {
- reference_not_full(t, info)
- } else {
- nok(t, info.label_start_index)
- }
+ Box::new(if is_ok {
+ ok
+ } else if defined {
+ reference_not_full
+ } else {
+ nok
})
})(tokenizer),
// Shortcut (`[asd]`) reference?
_ => {
- if defined {
- ok(tokenizer, info)
- } else {
- nok(tokenizer, info.label_start_index)
- }
+ let func = if defined { ok } else { nok };
+ func(tokenizer)
}
}
}
@@ -298,15 +271,9 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | [a] b
/// ^
/// ```
-fn reference_not_full(tokenizer: &mut Tokenizer, info: Info) -> State {
- tokenizer.attempt(collapsed_reference, move |is_ok| {
- Box::new(move |t| {
- if is_ok {
- ok(t, info)
- } else {
- nok(t, info.label_start_index)
- }
- })
+fn reference_not_full(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(collapsed_reference, |is_ok| {
+ Box::new(if is_ok { ok } else { nok })
})(tokenizer)
}
@@ -322,16 +289,15 @@ fn reference_not_full(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | [a] b
/// ^
/// ```
-fn ok(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn ok(tokenizer: &mut Tokenizer) -> State {
+ let label_start_index = tokenizer.tokenize_state.start;
// Remove this one and everything after it.
- let mut left = tokenizer
- .label_start_stack
- .split_off(info.label_start_index);
+ let mut left = tokenizer.label_start_stack.split_off(label_start_index);
// Remove this one from `left`, as we’ll move it to `media_list`.
- left.remove(0);
+ let label_start = left.remove(0);
tokenizer.label_start_list_loose.append(&mut left);
- let is_link = tokenizer.events[info.media.start.0].token_type == Token::LabelLink;
+ let is_link = tokenizer.events[label_start.start.0].token_type == Token::LabelLink;
if is_link {
let mut index = 0;
@@ -344,8 +310,12 @@ fn ok(tokenizer: &mut Tokenizer, mut info: Info) -> State {
}
}
- info.media.end.1 = tokenizer.events.len() - 1;
- tokenizer.media_list.push(info.media);
+ tokenizer.media_list.push(Media {
+ start: label_start.start,
+ end: (tokenizer.tokenize_state.end, tokenizer.events.len() - 1),
+ });
+ tokenizer.tokenize_state.start = 0;
+ tokenizer.tokenize_state.end = 0;
tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media));
State::Ok
}
@@ -362,12 +332,14 @@ fn ok(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// > | [a] b
/// ^
/// ```
-fn nok(tokenizer: &mut Tokenizer, label_start_index: usize) -> State {
+fn nok(tokenizer: &mut Tokenizer) -> State {
tokenizer
.label_start_stack
- .get_mut(label_start_index)
+ .get_mut(tokenizer.tokenize_state.start)
.unwrap()
.balanced = true;
+ tokenizer.tokenize_state.start = 0;
+ tokenizer.tokenize_state.end = 0;
State::Nok
}
@@ -407,24 +379,23 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn resource_open(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b')') => resource_end(tokenizer),
- _ => tokenizer.go(
- |t| {
- destination(
- t,
- DestinationOptions {
- limit: RESOURCE_DESTINATION_BALANCE_MAX,
- destination: Token::ResourceDestination,
- literal: Token::ResourceDestinationLiteral,
- marker: Token::ResourceDestinationLiteralMarker,
- raw: Token::ResourceDestinationRaw,
- string: Token::ResourceDestinationString,
- },
- )
- },
- destination_after,
- )(tokenizer),
+ if let Some(b')') = tokenizer.current {
+ resource_end(tokenizer)
+ } else {
+ tokenizer.tokenize_state.token_1 = Token::ResourceDestination;
+ tokenizer.tokenize_state.token_2 = Token::ResourceDestinationLiteral;
+ tokenizer.tokenize_state.token_3 = Token::ResourceDestinationLiteralMarker;
+ tokenizer.tokenize_state.token_4 = Token::ResourceDestinationRaw;
+ tokenizer.tokenize_state.token_5 = Token::ResourceDestinationString;
+ tokenizer.tokenize_state.size_other = RESOURCE_DESTINATION_BALANCE_MAX;
+
+ tokenizer.attempt(destination, |ok| {
+ Box::new(if ok {
+ destination_after
+ } else {
+ destination_missing
+ })
+ })(tokenizer)
}
}
@@ -435,11 +406,29 @@ fn resource_open(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn destination_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Token::Data;
+ tokenizer.tokenize_state.token_2 = Token::Data;
+ tokenizer.tokenize_state.token_3 = Token::Data;
+ tokenizer.tokenize_state.token_4 = Token::Data;
+ tokenizer.tokenize_state.token_5 = Token::Data;
+ tokenizer.tokenize_state.size_other = 0;
+
tokenizer.attempt(space_or_tab_eol(), |ok| {
Box::new(if ok { resource_between } else { resource_end })
})(tokenizer)
}
+/// Without destination.
+fn destination_missing(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Token::Data;
+ tokenizer.tokenize_state.token_2 = Token::Data;
+ tokenizer.tokenize_state.token_3 = Token::Data;
+ tokenizer.tokenize_state.token_4 = Token::Data;
+ tokenizer.tokenize_state.token_5 = Token::Data;
+ tokenizer.tokenize_state.size_other = 0;
+ State::Nok
+}
+
/// In a resource, after a destination, after whitespace.
///
/// ```markdown
@@ -448,19 +437,12 @@ fn destination_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'"' | b'\'' | b'(') => tokenizer.go(
- |t| {
- title(
- t,
- TitleOptions {
- title: Token::ResourceTitle,
- marker: Token::ResourceTitleMarker,
- string: Token::ResourceTitleString,
- },
- )
- },
- title_after,
- )(tokenizer),
+ Some(b'"' | b'\'' | b'(') => {
+ tokenizer.tokenize_state.token_1 = Token::ResourceTitle;
+ tokenizer.tokenize_state.token_2 = Token::ResourceTitleMarker;
+ tokenizer.tokenize_state.token_3 = Token::ResourceTitleString;
+ tokenizer.go(title, title_after)(tokenizer)
+ }
_ => resource_end(tokenizer),
}
}
@@ -472,6 +454,9 @@ fn resource_between(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn title_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Token::Data;
+ tokenizer.tokenize_state.token_2 = Token::Data;
+ tokenizer.tokenize_state.token_3 = Token::Data;
tokenizer.attempt_opt(space_or_tab_eol(), resource_end)(tokenizer)
}
@@ -502,19 +487,12 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State {
/// ```
fn full_reference(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'[') => tokenizer.go(
- |t| {
- label(
- t,
- LabelOptions {
- label: Token::Reference,
- marker: Token::ReferenceMarker,
- string: Token::ReferenceString,
- },
- )
- },
- full_reference_after,
- )(tokenizer),
+ Some(b'[') => {
+ tokenizer.tokenize_state.token_1 = Token::Reference;
+ tokenizer.tokenize_state.token_2 = Token::ReferenceMarker;
+ tokenizer.tokenize_state.token_3 = Token::ReferenceString;
+ tokenizer.go(label, full_reference_after)(tokenizer)
+ }
_ => unreachable!("expected `[`"),
}
}
@@ -526,6 +504,10 @@ fn full_reference(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Token::Data;
+ tokenizer.tokenize_state.token_2 = Token::Data;
+ tokenizer.tokenize_state.token_3 = Token::Data;
+
if tokenizer
.parse_state
.definitions
diff --git a/src/construct/list.rs b/src/construct/list.rs
index d5a9899..0e12b7c 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -123,7 +123,7 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State {
fn before_ordered(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::ListItemPrefix);
tokenizer.enter(Token::ListItemValue);
- inside(tokenizer, 0)
+ inside(tokenizer)
}
/// In an ordered list item value.
@@ -132,17 +132,21 @@ fn before_ordered(tokenizer: &mut Tokenizer) -> State {
/// > | 1. a
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer, size: usize) -> State {
+fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'.' | b')') if !tokenizer.interrupt || size < 2 => {
+ Some(b'.' | b')') if !tokenizer.interrupt || tokenizer.tokenize_state.size < 2 => {
tokenizer.exit(Token::ListItemValue);
marker(tokenizer)
}
- Some(b'0'..=b'9') if size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
+ Some(b'0'..=b'9') if tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
+ tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- State::Fn(Box::new(move |t| inside(t, size + 1)))
+ State::Fn(Box::new(inside))
+ }
+ _ => {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
- _ => State::Nok,
}
}
@@ -170,12 +174,9 @@ fn marker(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn marker_after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.check(blank_line, move |ok| {
- if ok {
- Box::new(|t| after(t, true))
- } else {
- Box::new(marker_after_not_blank)
- }
+ tokenizer.tokenize_state.size = 1;
+ tokenizer.check(blank_line, |ok| {
+ Box::new(if ok { after } else { marker_after_not_blank })
})(tokenizer)
}
@@ -186,13 +187,11 @@ fn marker_after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn marker_after_not_blank(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.size = 0;
+
// Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace.
- tokenizer.attempt(whitespace, move |ok| {
- if ok {
- Box::new(|t| after(t, false))
- } else {
- Box::new(prefix_other)
- }
+ tokenizer.attempt(whitespace, |ok| {
+ Box::new(if ok { after } else { prefix_other })
})(tokenizer)
}
@@ -232,7 +231,7 @@ fn prefix_other(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Token::SpaceOrTab);
tokenizer.consume();
tokenizer.exit(Token::SpaceOrTab);
- State::Fn(Box::new(|t| after(t, false)))
+ State::Fn(Box::new(after))
}
_ => State::Nok,
}
@@ -244,7 +243,10 @@ fn prefix_other(tokenizer: &mut Tokenizer) -> State {
/// > | * a
/// ^
/// ```
-fn after(tokenizer: &mut Tokenizer, blank: bool) -> State {
+fn after(tokenizer: &mut Tokenizer) -> State {
+ let blank = tokenizer.tokenize_state.size == 1;
+ tokenizer.tokenize_state.size = 0;
+
if blank && tokenizer.interrupt {
State::Nok
} else {
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index ec5669c..7fdaa66 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -44,9 +44,7 @@ use crate::util::skip::opt as skip_opt;
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') => {
- unreachable!("unexpected eol/eof")
- }
+ None | Some(b'\n') => unreachable!("unexpected eol/eof"),
_ => {
tokenizer.enter(Token::Paragraph);
tokenizer.enter_with_content(Token::Data, Some(ContentType::Text));
diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs
index d92c9c1..2257bfd 100644
--- a/src/construct/partial_bom.rs
+++ b/src/construct/partial_bom.rs
@@ -13,6 +13,8 @@
use crate::token::Token;
use crate::tokenizer::{State, Tokenizer};
+const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
+
/// Before a BOM.
///
/// ```text
@@ -20,42 +22,33 @@ use crate::tokenizer::{State, Tokenizer};
/// ^^^^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.current == Some(0xEF) {
+ if tokenizer.current == Some(BOM[0]) {
tokenizer.enter(Token::ByteOrderMark);
- tokenizer.consume();
- State::Fn(Box::new(cont))
- } else {
- State::Nok
- }
-}
-
-/// Second byte in BOM.
-///
-/// ```text
-/// > | 0xEF 0xBB 0xBF
-/// ^^^^
-/// ```
-fn cont(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.current == Some(0xBB) {
- tokenizer.consume();
- State::Fn(Box::new(end))
+ inside(tokenizer)
} else {
State::Nok
}
}
-/// Last byte in BOM.
+/// Inside the BOM.
///
/// ```text
/// > | 0xEF 0xBB 0xBF
-/// ^^^^
+/// ^^^^ ^^^^ ^^^^
/// ```
-fn end(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.current == Some(0xBF) {
+fn inside(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(BOM[tokenizer.tokenize_state.size]) {
+ tokenizer.tokenize_state.size += 1;
tokenizer.consume();
- tokenizer.exit(Token::ByteOrderMark);
- State::Ok
+ if tokenizer.tokenize_state.size == BOM.len() {
+ tokenizer.exit(Token::ByteOrderMark);
+ tokenizer.tokenize_state.size = 0;
+ State::Ok
+ } else {
+ State::Fn(Box::new(inside))
+ }
} else {
+ tokenizer.tokenize_state.size = 0;
State::Nok
}
}
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 335d7ab..0365489 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -15,14 +15,14 @@ use crate::tokenizer::{EventType, State, Tokenizer};
/// > | abc
/// ^
/// ```
-pub fn start(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {
+pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(byte) if stop.contains(&byte) => {
+ Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {
tokenizer.enter(Token::Data);
tokenizer.consume();
- State::Fn(Box::new(move |t| data(t, stop)))
+ State::Fn(Box::new(data))
}
- _ => at_break(tokenizer, stop),
+ _ => at_break(tokenizer),
}
}
@@ -32,22 +32,22 @@ pub fn start(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {
/// > | abc
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {
+fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(move |t| at_break(t, stop)))
+ State::Fn(Box::new(at_break))
}
- Some(byte) if stop.contains(&byte) => {
+ Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => {
tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data));
State::Ok
}
_ => {
tokenizer.enter(Token::Data);
- data(tokenizer, stop)
+ data(tokenizer)
}
}
}
@@ -58,19 +58,19 @@ fn at_break(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {
/// > | abc
/// ^^^
/// ```
-fn data(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {
+fn data(tokenizer: &mut Tokenizer) -> State {
let done = match tokenizer.current {
None | Some(b'\n') => true,
- Some(byte) if stop.contains(&byte) => true,
+ Some(byte) if tokenizer.tokenize_state.stop.contains(&byte) => true,
_ => false,
};
if done {
tokenizer.exit(Token::Data);
- at_break(tokenizer, stop)
+ at_break(tokenizer)
} else {
tokenizer.consume();
- State::Fn(Box::new(move |t| data(t, stop)))
+ State::Fn(Box::new(data))
}
}
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 809aa27..f1cfc7d 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -74,34 +74,6 @@
use crate::token::Token;
use crate::tokenizer::{ContentType, State, Tokenizer};
-/// Configuration.
-///
-/// You must pass the token types in that are used.
-#[derive(Debug)]
-pub struct Options {
- /// Token for the whole destination.
- pub destination: Token,
- /// Token for a literal (enclosed) destination.
- pub literal: Token,
- /// Token for a literal marker.
- pub marker: Token,
- /// Token for a raw destination.
- pub raw: Token,
- /// Token for a the string.
- pub string: Token,
- /// Maximum unbalanced parens.
- pub limit: usize,
-}
-
-/// State needed to parse destination.
-#[derive(Debug)]
-struct Info {
- /// Paren balance (used in raw).
- balance: usize,
- /// Configuration.
- options: Options,
-}
-
/// Before a destination.
///
/// ```markdown
@@ -110,29 +82,24 @@ struct Info {
/// > | aa
/// ^
/// ```
-pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
- let info = Info {
- balance: 0,
- options,
- };
-
+pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'<') => {
- tokenizer.enter(info.options.destination.clone());
- tokenizer.enter(info.options.literal.clone());
- tokenizer.enter(info.options.marker.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
tokenizer.consume();
- tokenizer.exit(info.options.marker.clone());
- State::Fn(Box::new(|t| enclosed_before(t, info)))
+ tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
+ State::Fn(Box::new(enclosed_before))
}
// ASCII control, space, closing paren, but *not* `\0`.
None | Some(0x01..=0x1F | b' ' | b')' | 0x7F) => State::Nok,
Some(_) => {
- tokenizer.enter(info.options.destination.clone());
- tokenizer.enter(info.options.raw.clone());
- tokenizer.enter(info.options.string.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_4.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_5.clone());
tokenizer.enter_with_content(Token::Data, Some(ContentType::String));
- raw(tokenizer, info)
+ raw(tokenizer)
}
}
}
@@ -143,18 +110,18 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
/// > | <aa>
/// ^
/// ```
-fn enclosed_before(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn enclosed_before(tokenizer: &mut Tokenizer) -> State {
if let Some(b'>') = tokenizer.current {
- tokenizer.enter(info.options.marker.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
tokenizer.consume();
- tokenizer.exit(info.options.marker.clone());
- tokenizer.exit(info.options.literal.clone());
- tokenizer.exit(info.options.destination);
+ tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
State::Ok
} else {
- tokenizer.enter(info.options.string.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_5.clone());
tokenizer.enter_with_content(Token::Data, Some(ContentType::String));
- enclosed(tokenizer, info)
+ enclosed(tokenizer)
}
}
@@ -164,21 +131,21 @@ fn enclosed_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <aa>
/// ^
/// ```
-fn enclosed(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn enclosed(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n' | b'<') => State::Nok,
Some(b'>') => {
tokenizer.exit(Token::Data);
- tokenizer.exit(info.options.string.clone());
- enclosed_before(tokenizer, info)
+ tokenizer.exit(tokenizer.tokenize_state.token_5.clone());
+ enclosed_before(tokenizer)
}
Some(b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(|t| enclosed_escape(t, info)))
+ State::Fn(Box::new(enclosed_escape))
}
_ => {
tokenizer.consume();
- State::Fn(Box::new(|t| enclosed(t, info)))
+ State::Fn(Box::new(enclosed))
}
}
}
@@ -189,13 +156,13 @@ fn enclosed(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <a\*a>
/// ^
/// ```
-fn enclosed_escape(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'<' | b'>' | b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(|t| enclosed(t, info)))
+ State::Fn(Box::new(enclosed))
}
- _ => enclosed(tokenizer, info),
+ _ => enclosed(tokenizer),
}
}
@@ -205,34 +172,38 @@ fn enclosed_escape(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | aa
/// ^
/// ```
-fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn raw(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\t' | b'\n' | b' ' | b')') if info.balance == 0 => {
+ None | Some(b'\t' | b'\n' | b' ' | b')') if tokenizer.tokenize_state.size == 0 => {
tokenizer.exit(Token::Data);
- tokenizer.exit(info.options.string.clone());
- tokenizer.exit(info.options.raw.clone());
- tokenizer.exit(info.options.destination);
+ tokenizer.exit(tokenizer.tokenize_state.token_5.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_4.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.tokenize_state.size = 0;
State::Ok
}
- Some(b'(') if info.balance < info.options.limit => {
+ Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_other => {
tokenizer.consume();
- info.balance += 1;
- State::Fn(Box::new(move |t| raw(t, info)))
+ tokenizer.tokenize_state.size += 1;
+ State::Fn(Box::new(raw))
}
// ASCII control (but *not* `\0`) and space and `(`.
- None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) => State::Nok,
+ None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) => {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ }
Some(b')') => {
tokenizer.consume();
- info.balance -= 1;
- State::Fn(Box::new(move |t| raw(t, info)))
+ tokenizer.tokenize_state.size -= 1;
+ State::Fn(Box::new(raw))
}
Some(b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(move |t| raw_escape(t, info)))
+ State::Fn(Box::new(raw_escape))
}
Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(move |t| raw(t, info)))
+ State::Fn(Box::new(raw))
}
}
}
@@ -243,12 +214,12 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// > | a\*a
/// ^
/// ```
-fn raw_escape(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn raw_escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'(' | b')' | b'\\') => {
tokenizer.consume();
- State::Fn(Box::new(move |t| raw(t, info)))
+ State::Fn(Box::new(raw))
}
- _ => raw(tokenizer, info),
+ _ => raw(tokenizer),
}
}
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 6fdb70d..0e1c2ec 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -64,53 +64,21 @@ use crate::subtokenize::link;
use crate::token::Token;
use crate::tokenizer::{ContentType, State, Tokenizer};
-/// Configuration.
-///
-/// You must pass the token types in that are used.
-#[derive(Debug)]
-pub struct Options {
- /// Token for the whole label.
- pub label: Token,
- /// Token for the markers.
- pub marker: Token,
- /// Token for the string (inside the markers).
- pub string: Token,
-}
-
-/// State needed to parse labels.
-#[derive(Debug)]
-struct Info {
- /// Whether we’ve seen our first `ChunkString`.
- connect: bool,
- /// Whether there are non-blank bytes in the label.
- data: bool,
- /// Number of bytes in the label.
- size: usize,
- /// Configuration.
- options: Options,
-}
-
/// Before a label.
///
/// ```markdown
/// > | [a]
/// ^
/// ```
-pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
+pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[') => {
- let info = Info {
- connect: false,
- data: false,
- size: 0,
- options,
- };
- tokenizer.enter(info.options.label.clone());
- tokenizer.enter(info.options.marker.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
tokenizer.consume();
- tokenizer.exit(info.options.marker.clone());
- tokenizer.enter(info.options.string.clone());
- State::Fn(Box::new(|t| at_break(t, info)))
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
+ State::Fn(Box::new(at_break))
}
_ => State::Nok,
}
@@ -122,72 +90,88 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
/// > | [a]
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
- if info.size > LINK_REFERENCE_SIZE_MAX
+fn at_break(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX
|| matches!(tokenizer.current, None | Some(b'['))
- || (matches!(tokenizer.current, Some(b']')) && !info.data)
+ || (matches!(tokenizer.current, Some(b']')) && !tokenizer.tokenize_state.seen)
{
+ tokenizer.tokenize_state.connect = false;
+ tokenizer.tokenize_state.seen = false;
+ tokenizer.tokenize_state.size = 0;
State::Nok
} else {
match tokenizer.current {
- Some(b'\n') => tokenizer.go(
+ Some(b'\n') => tokenizer.attempt(
space_or_tab_eol_with_options(EolOptions {
content_type: Some(ContentType::String),
- connect: info.connect,
+ connect: tokenizer.tokenize_state.connect,
}),
- |t| {
- info.connect = true;
- at_break(t, info)
- },
+ |ok| Box::new(if ok { after_eol } else { at_blank_line }),
)(tokenizer),
Some(b']') => {
- tokenizer.exit(info.options.string.clone());
- tokenizer.enter(info.options.marker.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
tokenizer.consume();
- tokenizer.exit(info.options.marker.clone());
- tokenizer.exit(info.options.label);
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.tokenize_state.connect = false;
+ tokenizer.tokenize_state.seen = false;
+ tokenizer.tokenize_state.size = 0;
State::Ok
}
_ => {
tokenizer.enter_with_content(Token::Data, Some(ContentType::String));
- if info.connect {
+ if tokenizer.tokenize_state.connect {
let index = tokenizer.events.len() - 1;
link(&mut tokenizer.events, index);
} else {
- info.connect = true;
+ tokenizer.tokenize_state.connect = true;
}
- label(tokenizer, info)
+ label(tokenizer)
}
}
}
}
+/// To do.
+fn after_eol(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.connect = true;
+ at_break(tokenizer)
+}
+
+/// To do.
+fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.connect = false;
+ State::Nok
+}
+
/// In a label, in text.
///
/// ```markdown
/// > | [a]
/// ^
/// ```
-fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn label(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n' | b'[' | b']') => {
tokenizer.exit(Token::Data);
- at_break(tokenizer, info)
+ at_break(tokenizer)
}
Some(byte) => {
- if info.size > LINK_REFERENCE_SIZE_MAX {
+ if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX {
tokenizer.exit(Token::Data);
- at_break(tokenizer, info)
+ at_break(tokenizer)
} else {
let func = if matches!(byte, b'\\') { escape } else { label };
tokenizer.consume();
- info.size += 1;
- if !info.data && !matches!(byte, b'\t' | b' ') {
- info.data = true;
+ tokenizer.tokenize_state.size += 1;
+ if !tokenizer.tokenize_state.seen && !matches!(byte, b'\t' | b' ') {
+ tokenizer.tokenize_state.seen = true;
}
- State::Fn(Box::new(move |t| func(t, info)))
+ State::Fn(Box::new(func))
}
}
}
@@ -199,13 +183,13 @@ fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// > | [a\*a]
/// ^
/// ```
-fn escape(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'[' | b'\\' | b']') => {
tokenizer.consume();
- info.size += 1;
- State::Fn(Box::new(|t| label(t, info)))
+ tokenizer.tokenize_state.size += 1;
+ State::Fn(Box::new(label))
}
- _ => label(tokenizer, info),
+ _ => label(tokenizer),
}
}
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index f31cbc6..e3eac45 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -32,26 +32,6 @@ pub struct EolOptions {
pub content_type: Option<ContentType>,
}
-/// State needed to parse `space_or_tab`.
-#[derive(Debug)]
-struct Info {
- /// Current size.
- size: usize,
- /// Configuration.
- options: Options,
-}
-
-/// State needed to parse `space_or_tab_eol`.
-#[derive(Debug)]
-struct EolInfo {
- /// Whether to connect the next whitespace to the event before.
- connect: bool,
- /// Whether there was initial whitespace.
- ok: bool,
- /// Configuration.
- options: EolOptions,
-}
-
/// One or more `space_or_tab`.
///
/// ```bnf
@@ -78,7 +58,14 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
/// `space_or_tab`, with the given options.
pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
- Box::new(|t| start(t, Info { size: 0, options }))
+ Box::new(|tokenizer| {
+ tokenizer.tokenize_state.space_or_tab_connect = options.connect;
+ tokenizer.tokenize_state.space_or_tab_content_type = options.content_type;
+ tokenizer.tokenize_state.space_or_tab_min = options.min;
+ tokenizer.tokenize_state.space_or_tab_max = options.max;
+ tokenizer.tokenize_state.space_or_tab_token = options.kind;
+ start(tokenizer)
+ })
}
/// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and
@@ -97,30 +84,28 @@ pub fn space_or_tab_eol() -> Box<StateFn> {
/// `space_or_tab_eol`, with the given options.
pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
Box::new(move |tokenizer| {
- let mut info = EolInfo {
- connect: options.connect,
- ok: false,
- options,
- };
+ tokenizer.tokenize_state.space_or_tab_eol_content_type = options.content_type;
+ tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect;
tokenizer.attempt(
space_or_tab_with_options(Options {
kind: Token::SpaceOrTab,
min: 1,
max: usize::MAX,
- content_type: info.options.content_type.clone(),
- connect: info.options.connect,
+ content_type: tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .clone(),
+ connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
}),
move |ok| {
- if ok {
- info.ok = ok;
-
- if info.options.content_type.is_some() {
- info.connect = true;
+ Box::new(move |tokenizer| {
+ if ok {
+ tokenizer.tokenize_state.space_or_tab_eol_ok = ok;
}
- }
- Box::new(|t| after_space_or_tab(t, info))
+ after_space_or_tab(tokenizer)
+ })
},
)(tokenizer)
})
@@ -132,28 +117,24 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
/// > | a␠␠b
/// ^
/// ```
-fn start(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'\t' | b' ') if info.options.max > 0 => {
- tokenizer
- .enter_with_content(info.options.kind.clone(), info.options.content_type.clone());
+ Some(b'\t' | b' ') if tokenizer.tokenize_state.space_or_tab_max > 0 => {
+ tokenizer.enter_with_content(
+ tokenizer.tokenize_state.space_or_tab_token.clone(),
+ tokenizer.tokenize_state.space_or_tab_content_type.clone(),
+ );
- if info.options.content_type.is_some() {
+ if tokenizer.tokenize_state.space_or_tab_connect {
let index = tokenizer.events.len() - 1;
link(&mut tokenizer.events, index);
+ } else if tokenizer.tokenize_state.space_or_tab_content_type.is_some() {
+ tokenizer.tokenize_state.space_or_tab_connect = true;
}
- tokenizer.consume();
- info.size += 1;
- State::Fn(Box::new(|t| inside(t, info)))
- }
- _ => {
- if info.options.min == 0 {
- State::Ok
- } else {
- State::Nok
- }
+ inside(tokenizer)
}
+ _ => after(tokenizer),
}
}
@@ -163,24 +144,46 @@ fn start(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// > | a␠␠b
/// ^
/// ```
-fn inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'\t' | b' ') if info.size < info.options.max => {
+ Some(b'\t' | b' ')
+ if tokenizer.tokenize_state.space_or_tab_size
+ < tokenizer.tokenize_state.space_or_tab_max =>
+ {
tokenizer.consume();
- info.size += 1;
- State::Fn(Box::new(|t| inside(t, info)))
+ tokenizer.tokenize_state.space_or_tab_size += 1;
+ State::Fn(Box::new(inside))
}
_ => {
- tokenizer.exit(info.options.kind.clone());
- if info.size >= info.options.min {
- State::Ok
- } else {
- State::Nok
- }
+ tokenizer.exit(tokenizer.tokenize_state.space_or_tab_token.clone());
+ after(tokenizer)
}
}
}
+/// After `space_or_tab`.
+///
+/// ```markdown
+/// > | a␠␠b
+/// ^
+/// ```
+fn after(tokenizer: &mut Tokenizer) -> State {
+ let state = if tokenizer.tokenize_state.space_or_tab_size
+ >= tokenizer.tokenize_state.space_or_tab_min
+ {
+ State::Ok
+ } else {
+ State::Nok
+ };
+ tokenizer.tokenize_state.space_or_tab_connect = false;
+ tokenizer.tokenize_state.space_or_tab_content_type = None;
+ tokenizer.tokenize_state.space_or_tab_size = 0;
+ tokenizer.tokenize_state.space_or_tab_max = 0;
+ tokenizer.tokenize_state.space_or_tab_min = 0;
+ tokenizer.tokenize_state.space_or_tab_token = Token::SpaceOrTab;
+ state
+}
+
/// `space_or_tab_eol`: after optionally first `space_or_tab`.
///
/// ```markdown
@@ -188,24 +191,49 @@ fn inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ^
/// | b
/// ```
-fn after_space_or_tab(tokenizer: &mut Tokenizer, mut info: EolInfo) -> State {
- match tokenizer.current {
- Some(b'\n') => {
- tokenizer.enter_with_content(Token::LineEnding, info.options.content_type.clone());
+fn after_space_or_tab(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.tokenize_state.space_or_tab_eol_ok
+ && tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .is_some()
+ {
+ tokenizer.tokenize_state.space_or_tab_eol_connect = true;
+ }
- if info.connect {
- let index = tokenizer.events.len() - 1;
- link(&mut tokenizer.events, index);
- } else if info.options.content_type.is_some() {
- info.connect = true;
- }
+ if let Some(b'\n') = tokenizer.current {
+ tokenizer.enter_with_content(
+ Token::LineEnding,
+ tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .clone(),
+ );
- tokenizer.consume();
- tokenizer.exit(Token::LineEnding);
- State::Fn(Box::new(|t| after_eol(t, info)))
+ if tokenizer.tokenize_state.space_or_tab_eol_connect {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ } else if tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .is_some()
+ {
+ tokenizer.tokenize_state.space_or_tab_eol_connect = true;
}
- _ if info.ok => State::Ok,
- _ => State::Nok,
+
+ tokenizer.consume();
+ tokenizer.exit(Token::LineEnding);
+ State::Fn(Box::new(after_eol))
+ } else {
+ let state = if tokenizer.tokenize_state.space_or_tab_eol_ok {
+ State::Ok
+ } else {
+ State::Nok
+ };
+ tokenizer.tokenize_state.space_or_tab_eol_content_type = None;
+ tokenizer.tokenize_state.space_or_tab_eol_connect = false;
+ tokenizer.tokenize_state.space_or_tab_eol_ok = false;
+ state
}
}
@@ -217,14 +245,17 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer, mut info: EolInfo) -> State {
/// ^
/// ```
#[allow(clippy::needless_pass_by_value)]
-fn after_eol(tokenizer: &mut Tokenizer, info: EolInfo) -> State {
+fn after_eol(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt_opt(
space_or_tab_with_options(Options {
kind: Token::SpaceOrTab,
min: 1,
max: usize::MAX,
- content_type: info.options.content_type,
- connect: info.connect,
+ content_type: tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .clone(),
+ connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
}),
after_more_space_or_tab,
)(tokenizer)
@@ -238,6 +269,10 @@ fn after_eol(tokenizer: &mut Tokenizer, info: EolInfo) -> State {
/// ^
/// ```
fn after_more_space_or_tab(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.space_or_tab_eol_content_type = None;
+ tokenizer.tokenize_state.space_or_tab_eol_connect = false;
+ tokenizer.tokenize_state.space_or_tab_eol_ok = false;
+
// Blank line not allowed.
if matches!(tokenizer.current, None | Some(b'\n')) {
State::Nok
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 9cf2f14..6bf9099 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -35,50 +35,22 @@ use crate::subtokenize::link;
use crate::token::Token;
use crate::tokenizer::{ContentType, State, Tokenizer};
-/// Configuration.
-///
-/// You must pass the token types in that are used.
-#[derive(Debug)]
-pub struct Options {
- /// Token for the whole title.
- pub title: Token,
- /// Token for the marker.
- pub marker: Token,
- /// Token for the string inside the quotes.
- pub string: Token,
-}
-
-/// State needed to parse titles.
-#[derive(Debug)]
-struct Info {
- /// Whether we’ve seen data.
- connect: bool,
- /// Closing marker.
- marker: u8,
- /// Configuration.
- options: Options,
-}
-
/// Before a title.
///
/// ```markdown
/// > | "a"
/// ^
/// ```
-pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
+pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'"' | b'\'' | b'(') => {
let marker = tokenizer.current.unwrap();
- let info = Info {
- connect: false,
- marker: if marker == b'(' { b')' } else { marker },
- options,
- };
- tokenizer.enter(info.options.title.clone());
- tokenizer.enter(info.options.marker.clone());
+ tokenizer.tokenize_state.marker = if marker == b'(' { b')' } else { marker };
+ tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
tokenizer.consume();
- tokenizer.exit(info.options.marker.clone());
- State::Fn(Box::new(|t| begin(t, info)))
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+ State::Fn(Box::new(begin))
}
_ => State::Nok,
}
@@ -92,18 +64,22 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
/// > | "a"
/// ^
/// ```
-fn begin(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn begin(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == info.marker => {
- tokenizer.enter(info.options.marker.clone());
+ Some(b'"' | b'\'' | b')')
+ if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
+ {
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
tokenizer.consume();
- tokenizer.exit(info.options.marker.clone());
- tokenizer.exit(info.options.title);
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.connect = false;
State::Ok
}
_ => {
- tokenizer.enter(info.options.string.clone());
- at_break(tokenizer, info)
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
+ at_break(tokenizer)
}
}
}
@@ -114,58 +90,76 @@ fn begin(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | "a"
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None => State::Nok,
- Some(b'\n') => tokenizer.go(
+ None => {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.connect = false;
+ State::Nok
+ }
+ Some(b'\n') => tokenizer.attempt(
space_or_tab_eol_with_options(EolOptions {
content_type: Some(ContentType::String),
- connect: info.connect,
+ connect: tokenizer.tokenize_state.connect,
}),
- |t| {
- info.connect = true;
- at_break(t, info)
- },
+ |ok| Box::new(if ok { after_eol } else { at_blank_line }),
)(tokenizer),
- Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == info.marker => {
- tokenizer.exit(info.options.string.clone());
- begin(tokenizer, info)
+ Some(b'"' | b'\'' | b')')
+ if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
+ {
+ tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
+ begin(tokenizer)
}
Some(_) => {
tokenizer.enter_with_content(Token::Data, Some(ContentType::String));
- if info.connect {
+ if tokenizer.tokenize_state.connect {
let index = tokenizer.events.len() - 1;
link(&mut tokenizer.events, index);
} else {
- info.connect = true;
+ tokenizer.tokenize_state.connect = true;
}
- title(tokenizer, info)
+ title(tokenizer)
}
}
}
+/// To do.
+fn after_eol(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.connect = true;
+ at_break(tokenizer)
+}
+
+/// To do.
+fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.connect = false;
+ State::Nok
+}
+
/// In title text.
///
/// ```markdown
/// > | "a"
/// ^
/// ```
-fn title(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn title(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None | Some(b'\n') => {
tokenizer.exit(Token::Data);
- at_break(tokenizer, info)
+ at_break(tokenizer)
}
- Some(b'"' | b'\'' | b')') if tokenizer.current.unwrap() == info.marker => {
+ Some(b'"' | b'\'' | b')')
+ if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
+ {
tokenizer.exit(Token::Data);
- at_break(tokenizer, info)
+ at_break(tokenizer)
}
Some(byte) => {
let func = if matches!(byte, b'\\') { escape } else { title };
tokenizer.consume();
- State::Fn(Box::new(move |t| func(t, info)))
+ State::Fn(Box::new(func))
}
}
}
@@ -176,12 +170,12 @@ fn title(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | "a\*b"
/// ^
/// ```
-fn escape(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn escape(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'"' | b'\'' | b')') => {
tokenizer.consume();
- State::Fn(Box::new(|t| title(t, info)))
+ State::Fn(Box::new(title))
}
- _ => title(tokenizer, info),
+ _ => title(tokenizer),
}
}
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index bf3bd4d..0905e10 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -47,17 +47,9 @@
use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN;
use crate::token::Token;
-use crate::tokenizer::{Event, EventType, Resolver, Tokenizer};
+use crate::tokenizer::{Event, EventType, Tokenizer};
use crate::util::slice::{Position, Slice};
-/// Create a resolver to handle trailing whitespace in events.
-///
-/// Performing this as a resolver instead of a tokenizer improves performance
-/// *a lot*.
-pub fn create_resolve_whitespace(hard_break: bool, trim_whole: bool) -> Box<Resolver> {
- Box::new(move |t| resolve_whitespace(t, hard_break, trim_whole))
-}
-
/// Resolve whitespace.
pub fn resolve_whitespace(tokenizer: &mut Tokenizer, hard_break: bool, trim_whole: bool) {
let mut index = 0;
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 785d132..2ed2046 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -53,15 +53,6 @@ use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};
use crate::token::Token;
use crate::tokenizer::{State, Tokenizer};
-/// State needed to parse thematic breaks.
-#[derive(Debug)]
-struct Info {
- /// Marker.
- marker: u8,
- /// Number of markers.
- size: usize,
-}
-
/// Start of a thematic break.
///
/// ```markdown
@@ -95,13 +86,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'*' | b'-' | b'_') => at_break(
- tokenizer,
- Info {
- marker: tokenizer.current.unwrap(),
- size: 0,
- },
- ),
+ Some(b'*' | b'-' | b'_') => {
+ tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
+ at_break(tokenizer)
+ }
_ => State::Nok,
}
}
@@ -112,19 +100,27 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// > | ***
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {
+ None | Some(b'\n') if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
tokenizer.exit(Token::ThematicBreak);
// Feel free to interrupt.
tokenizer.interrupt = false;
State::Ok
}
- Some(b'*' | b'-' | b'_') if tokenizer.current.unwrap() == info.marker => {
+ Some(b'*' | b'-' | b'_')
+ if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
+ {
tokenizer.enter(Token::ThematicBreakSequence);
- sequence(tokenizer, info)
+ sequence(tokenizer)
+ }
+ _ => {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
- _ => State::Nok,
}
}
@@ -134,16 +130,18 @@ fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | ***
/// ^
/// ```
-fn sequence(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn sequence(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'*' | b'-' | b'_') if tokenizer.current.unwrap() == info.marker => {
+ Some(b'*' | b'-' | b'_')
+ if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
+ {
tokenizer.consume();
- info.size += 1;
- State::Fn(Box::new(|t| sequence(t, info)))
+ tokenizer.tokenize_state.size += 1;
+ State::Fn(Box::new(sequence))
}
_ => {
tokenizer.exit(Token::ThematicBreakSequence);
- tokenizer.attempt_opt(space_or_tab(), |t| at_break(t, info))(tokenizer)
+ tokenizer.attempt_opt(space_or_tab(), at_break)(tokenizer)
}
}
}
diff --git a/src/content/document.rs b/src/content/document.rs
index 76d510a..33c8ff9 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -17,9 +17,7 @@ use crate::content::flow::start as flow;
use crate::parser::ParseState;
use crate::subtokenize::subtokenize;
use crate::token::Token;
-use crate::tokenizer::{
- Container, ContainerState, Event, EventType, Point, State, StateFn, Tokenizer,
-};
+use crate::tokenizer::{Container, ContainerState, Event, EventType, Point, State, Tokenizer};
use crate::util::{
normalize_identifier::normalize_identifier,
skip,
@@ -57,29 +55,11 @@ enum Phase {
Eof,
}
-/// State needed to parse document.
-struct DocumentInfo {
- /// Number of containers that have continued.
- continued: usize,
- /// Index into `tokenizer.events` we need to track.
- index: usize,
- /// Events of containers added back later.
- inject: Vec<(Vec<Event>, Vec<Event>)>,
- /// The value of the previous line of flow’s `interrupt`.
- interrupt_before: bool,
- /// Whether the previous line of flow was a paragraph.
- paragraph_before: bool,
- /// Current containers.
- stack: Vec<ContainerState>,
- /// Current flow state function.
- next: Box<StateFn>,
-}
-
/// Turn `codes` as the document content type into events.
pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
let mut tokenizer = Tokenizer::new(point, parse_state);
- let state = tokenizer.push(0, parse_state.bytes.len(), Box::new(before));
+ let state = tokenizer.push(0, parse_state.bytes.len(), Box::new(start));
tokenizer.flush(state, true);
let mut index = 0;
@@ -123,28 +103,8 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
/// > | a
/// ^
/// ```
-fn before(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt_opt(bom, start)(tokenizer)
-}
-
-/// Before document.
-//
-/// ```markdown
-/// > | * a
-/// ^
-/// | > b
-/// ```
fn start(tokenizer: &mut Tokenizer) -> State {
- let info = DocumentInfo {
- index: 0,
- continued: 0,
- inject: vec![],
- next: Box::new(flow),
- paragraph_before: false,
- interrupt_before: false,
- stack: vec![],
- };
- line_start(tokenizer, info)
+ tokenizer.attempt_opt(bom, line_start)(tokenizer)
}
/// Start of a line.
@@ -155,13 +115,16 @@ fn start(tokenizer: &mut Tokenizer) -> State {
/// > | > b
/// ^
/// ```
-fn line_start(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State {
- info.index = tokenizer.events.len();
- info.inject.push((vec![], vec![]));
- info.continued = 0;
+fn line_start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.document_continued = 0;
+ tokenizer.tokenize_state.document_index = tokenizer.events.len();
+ tokenizer
+ .tokenize_state
+ .document_inject
+ .push((vec![], vec![]));
// Containers would only be interrupting if we’ve continued.
tokenizer.interrupt = false;
- container_existing_before(tokenizer, info)
+ container_existing_before(tokenizer)
}
/// Before existing containers.
@@ -171,27 +134,32 @@ fn line_start(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State {
/// > | > b
/// ^
/// ```
-fn container_existing_before(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State {
+fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
// If there are more existing containers, check whether the next one continues.
- if info.continued < info.stack.len() {
- let container = info.stack.remove(info.continued);
+ if tokenizer.tokenize_state.document_continued
+ < tokenizer.tokenize_state.document_container_stack.len()
+ {
+ let container = tokenizer
+ .tokenize_state
+ .document_container_stack
+ .remove(tokenizer.tokenize_state.document_continued);
let cont = match container.kind {
Container::BlockQuote => block_quote_cont,
Container::ListItem => list_item_const,
};
tokenizer.container = Some(container);
- tokenizer.attempt(cont, move |ok| {
- if ok {
- Box::new(|t| container_existing_after(t, info))
+ tokenizer.attempt(cont, |ok| {
+ Box::new(if ok {
+ container_existing_after
} else {
- Box::new(|t| container_existing_missing(t, info))
- }
+ container_existing_missing
+ })
})(tokenizer)
}
// Otherwise, check new containers.
else {
- container_new_before(tokenizer, info)
+ container_new_before(tokenizer)
}
}
@@ -202,10 +170,13 @@ fn container_existing_before(tokenizer: &mut Tokenizer, mut info: DocumentInfo)
/// > | > b
/// ^
/// ```
-fn container_existing_missing(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State {
+fn container_existing_missing(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.take().unwrap();
- info.stack.insert(info.continued, container);
- container_new_before(tokenizer, info)
+ tokenizer
+ .tokenize_state
+ .document_container_stack
+ .insert(tokenizer.tokenize_state.document_continued, container);
+ container_new_before(tokenizer)
}
/// After an existing container.
@@ -215,11 +186,14 @@ fn container_existing_missing(tokenizer: &mut Tokenizer, mut info: DocumentInfo)
/// > | b
/// ^
/// ```
-fn container_existing_after(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State {
+fn container_existing_after(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.take().unwrap();
- info.stack.insert(info.continued, container);
- info.continued += 1;
- container_existing_before(tokenizer, info)
+ tokenizer
+ .tokenize_state
+ .document_container_stack
+ .insert(tokenizer.tokenize_state.document_continued, container);
+ tokenizer.tokenize_state.document_continued += 1;
+ container_existing_before(tokenizer)
}
/// Before a new container.
@@ -230,16 +204,18 @@ fn container_existing_after(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -
/// > | > b
/// ^
/// ```
-fn container_new_before(tokenizer: &mut Tokenizer, info: DocumentInfo) -> State {
+fn container_new_before(tokenizer: &mut Tokenizer) -> State {
// If we have completely continued, restore the flow’s past `interrupt`
// status.
- if info.continued == info.stack.len() {
- tokenizer.interrupt = info.interrupt_before;
+ if tokenizer.tokenize_state.document_continued
+ == tokenizer.tokenize_state.document_container_stack.len()
+ {
+ tokenizer.interrupt = tokenizer.tokenize_state.document_interrupt_before;
// …and if we’re in a concrete construct, new containers can’t “pierce”
// into them.
if tokenizer.concrete {
- return containers_after(tokenizer, info);
+ return containers_after(tokenizer);
}
}
@@ -251,41 +227,42 @@ fn container_new_before(tokenizer: &mut Tokenizer, info: DocumentInfo) -> State
size: 0,
});
- tokenizer.attempt(block_quote, move |ok| {
- if ok {
- Box::new(|t| container_new_after(t, info))
+ tokenizer.attempt(block_quote, |ok| {
+ Box::new(if ok {
+ container_new_after
} else {
- Box::new(|tokenizer| {
- // List item?
- tokenizer.container = Some(ContainerState {
- kind: Container::ListItem,
- blank_initial: false,
- size: 0,
- });
-
- tokenizer.attempt(list_item, |ok| {
- Box::new(move |t| {
- if ok {
- container_new_after(t, info)
- } else {
- containers_after(t, info)
- }
- })
- })(tokenizer)
- })
- }
+ container_new_before_not_blockquote
+ })
+ })(tokenizer)
+}
+
+/// To do.
+fn container_new_before_not_blockquote(tokenizer: &mut Tokenizer) -> State {
+ // List item?
+ tokenizer.container = Some(ContainerState {
+ kind: Container::ListItem,
+ blank_initial: false,
+ size: 0,
+ });
+
+ tokenizer.attempt(list_item, |ok| {
+ Box::new(if ok {
+ container_new_after
+ } else {
+ containers_after
+ })
})(tokenizer)
}
/// After a new container.
-//
+///
/// ```markdown
/// > | * a
/// ^
/// > | > b
/// ^
/// ```
-fn container_new_after(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State {
+fn container_new_after(tokenizer: &mut Tokenizer) -> State {
let container = tokenizer.container.take().unwrap();
// Remove from the event stack.
@@ -312,16 +289,21 @@ fn container_new_after(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> Sta
// If we did not continue all existing containers, and there is a new one,
// close the flow and those containers.
- if info.continued != info.stack.len() {
- info = exit_containers(tokenizer, info, &Phase::Prefix);
+ if tokenizer.tokenize_state.document_continued
+ != tokenizer.tokenize_state.document_container_stack.len()
+ {
+ exit_containers(tokenizer, &Phase::Prefix);
}
// Try another new container.
- info.stack.push(container);
- info.continued += 1;
- info.interrupt_before = false;
+ tokenizer
+ .tokenize_state
+ .document_container_stack
+ .push(container);
+ tokenizer.tokenize_state.document_continued += 1;
+ tokenizer.tokenize_state.document_interrupt_before = false;
tokenizer.interrupt = false;
- container_new_before(tokenizer, info)
+ container_new_before(tokenizer)
}
/// After containers, before flow.
@@ -332,26 +314,36 @@ fn container_new_after(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> Sta
/// > | > b
/// ^
/// ```
-fn containers_after(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State {
+fn containers_after(tokenizer: &mut Tokenizer) -> State {
// Store the container events we parsed.
- info.inject
+ tokenizer
+ .tokenize_state
+ .document_inject
.last_mut()
.unwrap()
.0
- .append(&mut tokenizer.events.split_off(info.index));
-
- tokenizer.lazy = info.continued != info.stack.len();
- tokenizer.interrupt = info.interrupt_before;
+ .append(
+ &mut tokenizer
+ .events
+ .split_off(tokenizer.tokenize_state.document_index),
+ );
+
+ tokenizer.lazy = tokenizer.tokenize_state.document_continued
+ != tokenizer.tokenize_state.document_container_stack.len();
+ tokenizer.interrupt = tokenizer.tokenize_state.document_interrupt_before;
tokenizer.define_skip_current();
- let state = info.next;
- info.next = Box::new(flow);
+ let state = tokenizer
+ .tokenize_state
+ .document_next
+ .take()
+ .unwrap_or_else(|| Box::new(flow));
// Parse flow, pausing after eols.
tokenizer.go_until(
state,
|code| matches!(code, Some(b'\n')),
- move |state| Box::new(move |t| flow_end(t, info, state)),
+ |state| Box::new(|t| flow_end(t, state)),
)(tokenizer)
}
@@ -362,7 +354,7 @@ fn containers_after(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State
/// > | > b
/// ^ ^
/// ```
-fn flow_end(tokenizer: &mut Tokenizer, mut info: DocumentInfo, result: State) -> State {
+fn flow_end(tokenizer: &mut Tokenizer, result: State) -> State {
let paragraph = !tokenizer.events.is_empty()
&& tokenizer.events[skip::opt_back(
&tokenizer.events,
@@ -372,53 +364,59 @@ fn flow_end(tokenizer: &mut Tokenizer, mut info: DocumentInfo, result: State) ->
.token_type
== Token::Paragraph;
- if tokenizer.lazy && info.paragraph_before && paragraph {
- info.continued = info.stack.len();
+ if tokenizer.lazy && paragraph && tokenizer.tokenize_state.document_paragraph_before {
+ tokenizer.tokenize_state.document_continued =
+ tokenizer.tokenize_state.document_container_stack.len();
}
- if info.continued != info.stack.len() {
- info = exit_containers(tokenizer, info, &Phase::After);
+ if tokenizer.tokenize_state.document_continued
+ != tokenizer.tokenize_state.document_container_stack.len()
+ {
+ exit_containers(tokenizer, &Phase::After);
}
- info.paragraph_before = paragraph;
- info.interrupt_before = tokenizer.interrupt;
-
match result {
State::Ok => {
- if !info.stack.is_empty() {
- info.continued = 0;
- info = exit_containers(tokenizer, info, &Phase::Eof);
+ if !tokenizer.tokenize_state.document_container_stack.is_empty() {
+ tokenizer.tokenize_state.document_continued = 0;
+ exit_containers(tokenizer, &Phase::Eof);
}
- resolve(tokenizer, &mut info);
- result
+ resolve(tokenizer);
+ State::Ok
}
State::Nok => unreachable!("unexpected `nok` from flow"),
State::Fn(func) => {
- info.next = func;
- line_start(tokenizer, info)
+ tokenizer.tokenize_state.document_paragraph_before = paragraph;
+ tokenizer.tokenize_state.document_interrupt_before = tokenizer.interrupt;
+ tokenizer.tokenize_state.document_next = Some(func);
+ line_start(tokenizer)
}
}
}
/// Close containers (and flow if needed).
-fn exit_containers(
- tokenizer: &mut Tokenizer,
- mut info: DocumentInfo,
- phase: &Phase,
-) -> DocumentInfo {
- let mut stack_close = info.stack.split_off(info.continued);
+fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
+ let mut stack_close = tokenizer
+ .tokenize_state
+ .document_container_stack
+ .split_off(tokenizer.tokenize_state.document_continued);
// So, we’re at the end of a line, but we need to close the *previous* line.
if *phase != Phase::Eof {
tokenizer.define_skip_current();
- let mut current_events = tokenizer.events.split_off(info.index);
- let next = info.next;
- info.next = Box::new(flow); // This is weird but Rust needs a function there.
- tokenizer.flush(State::Fn(next), false);
+ let mut current_events = tokenizer
+ .events
+ .split_off(tokenizer.tokenize_state.document_index);
+ let state = tokenizer
+ .tokenize_state
+ .document_next
+ .take()
+ .unwrap_or_else(|| Box::new(flow));
+ tokenizer.flush(State::Fn(state), false);
if *phase == Phase::Prefix {
- info.index = tokenizer.events.len();
+ tokenizer.tokenize_state.document_index = tokenizer.events.len();
}
tokenizer.events.append(&mut current_events);
@@ -442,17 +440,18 @@ fn exit_containers(
});
}
- let index = info.inject.len() - (if *phase == Phase::Eof { 1 } else { 2 });
- info.inject[index].1.append(&mut exits);
- info.interrupt_before = false;
-
- info
+ let index =
+ tokenizer.tokenize_state.document_inject.len() - (if *phase == Phase::Eof { 1 } else { 2 });
+ tokenizer.tokenize_state.document_inject[index]
+ .1
+ .append(&mut exits);
+ tokenizer.tokenize_state.document_interrupt_before = false;
}
// Inject the container events.
-fn resolve(tokenizer: &mut Tokenizer, info: &mut DocumentInfo) {
+fn resolve(tokenizer: &mut Tokenizer) {
let mut index = 0;
- let mut inject = info.inject.split_off(0);
+ let mut inject = tokenizer.tokenize_state.document_inject.split_off(0);
inject.reverse();
let mut first_line_ending_in_run = None;
diff --git a/src/content/string.rs b/src/content/string.rs
index d2aec3f..2e738fb 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -14,7 +14,7 @@
use crate::construct::{
character_escape::start as character_escape, character_reference::start as character_reference,
- partial_data::start as data, partial_whitespace::create_resolve_whitespace,
+ partial_data::start as data, partial_whitespace::resolve_whitespace,
};
use crate::tokenizer::{State, Tokenizer};
@@ -22,10 +22,8 @@ const MARKERS: [u8; 2] = [b'&', b'\\'];
/// Start of string.
pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.register_resolver(
- "whitespace".to_string(),
- Box::new(create_resolve_whitespace(false, false)),
- );
+ tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve));
+ tokenizer.tokenize_state.stop = &MARKERS;
before(tokenizer)
}
@@ -42,5 +40,10 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// At data.
fn before_data(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(|t| data(t, &MARKERS), before)(tokenizer)
+ tokenizer.go(data, before)(tokenizer)
+}
+
+/// Resolve whitespace.
+pub fn resolve(tokenizer: &mut Tokenizer) {
+ resolve_whitespace(tokenizer, false, false);
}
diff --git a/src/content/text.rs b/src/content/text.rs
index 30c98a3..f4666d1 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -26,7 +26,7 @@ use crate::construct::{
code_text::start as code_text, hard_break_escape::start as hard_break_escape,
html_text::start as html_text, label_end::start as label_end,
label_start_image::start as label_start_image, label_start_link::start as label_start_link,
- partial_data::start as data, partial_whitespace::create_resolve_whitespace,
+ partial_data::start as data, partial_whitespace::resolve_whitespace,
};
use crate::tokenizer::{State, Tokenizer};
@@ -44,13 +44,8 @@ const MARKERS: [u8; 9] = [
/// Start of text.
pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.register_resolver(
- "whitespace".to_string(),
- Box::new(create_resolve_whitespace(
- tokenizer.parse_state.constructs.hard_break_trailing,
- true,
- )),
- );
+ tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve));
+ tokenizer.tokenize_state.stop = &MARKERS;
before(tokenizer)
}
@@ -82,5 +77,14 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
/// |qwe
/// ```
fn before_data(tokenizer: &mut Tokenizer) -> State {
- tokenizer.go(|t| data(t, &MARKERS), before)(tokenizer)
+ tokenizer.go(data, before)(tokenizer)
+}
+
+/// Resolve whitespace.
+pub fn resolve(tokenizer: &mut Tokenizer) {
+ resolve_whitespace(
+ tokenizer,
+ tokenizer.parse_state.constructs.hard_break_trailing,
+ true,
+ );
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9ab4309..3068ddf 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -121,8 +121,6 @@ pub struct Media {
pub start: (usize, usize),
/// Indices of where the media’s label end starts and ends in `events`.
pub end: (usize, usize),
- /// Identifier
- pub id: String,
}
/// Supported containers.
@@ -163,6 +161,62 @@ struct InternalState {
point: Point,
}
+/// To do
+#[allow(clippy::struct_excessive_bools)]
+pub struct TokenizeState {
+ /// To do.
+ pub connect: bool,
+ /// To do.
+ pub document_container_stack: Vec<ContainerState>,
+ /// To do.
+ pub document_continued: usize,
+ /// To do.
+ pub document_index: usize,
+ /// To do.
+ pub document_inject: Vec<(Vec<Event>, Vec<Event>)>,
+ /// To do.
+ pub document_interrupt_before: bool,
+ /// To do.
+ pub document_paragraph_before: bool,
+ /// To do.
+ pub document_next: Option<Box<StateFn>>,
+ /// To do.
+ pub marker: u8,
+ /// To do.
+ pub marker_other: u8,
+ /// To do.
+ pub prefix: usize,
+ /// To do.
+ pub return_state: Option<Box<StateFn>>,
+ /// To do.
+ pub seen: bool,
+ /// To do.
+ pub size: usize,
+ /// To do.
+ pub size_other: usize,
+ /// To do.
+ pub start: usize,
+ /// To do.
+ pub end: usize,
+ /// To do.
+ pub stop: &'static [u8],
+ pub space_or_tab_eol_content_type: Option<ContentType>,
+ pub space_or_tab_eol_connect: bool,
+ pub space_or_tab_eol_ok: bool,
+ pub space_or_tab_connect: bool,
+ pub space_or_tab_content_type: Option<ContentType>,
+ pub space_or_tab_min: usize,
+ pub space_or_tab_max: usize,
+ pub space_or_tab_size: usize,
+ pub space_or_tab_token: Token,
+ /// To do.
+ pub token_1: Token,
+ pub token_2: Token,
+ pub token_3: Token,
+ pub token_4: Token,
+ pub token_5: Token,
+}
+
/// A tokenizer itself.
#[allow(clippy::struct_excessive_bools)]
pub struct Tokenizer<'a> {
@@ -179,6 +233,8 @@ pub struct Tokenizer<'a> {
consumed: bool,
/// Track whether this tokenizer is done.
resolved: bool,
+ /// To do.
+ attempt_balance: usize,
/// Current byte.
pub current: Option<u8>,
/// Previous byte.
@@ -200,6 +256,8 @@ pub struct Tokenizer<'a> {
resolver_ids: Vec<String>,
/// Shared parsing state across tokenizers.
pub parse_state: &'a ParseState<'a>,
+ /// To do.
+ pub tokenize_state: TokenizeState,
/// Stack of label (start) that could form images and links.
///
/// Used when tokenizing [text content][crate::content::text].
@@ -241,10 +299,45 @@ impl<'a> Tokenizer<'a> {
line_start: point.clone(),
consumed: true,
resolved: false,
+ attempt_balance: 0,
point,
stack: vec![],
events: vec![],
parse_state,
+ tokenize_state: TokenizeState {
+ connect: false,
+ document_container_stack: vec![],
+ document_continued: 0,
+ document_index: 0,
+ document_inject: vec![],
+ document_interrupt_before: false,
+ document_paragraph_before: false,
+ document_next: None,
+ marker: 0,
+ marker_other: 0,
+ prefix: 0,
+ seen: false,
+ size: 0,
+ size_other: 0,
+ start: 0,
+ end: 0,
+ stop: &[],
+ return_state: None,
+ space_or_tab_eol_content_type: None,
+ space_or_tab_eol_connect: false,
+ space_or_tab_eol_ok: false,
+ space_or_tab_connect: false,
+ space_or_tab_content_type: None,
+ space_or_tab_min: 0,
+ space_or_tab_max: 0,
+ space_or_tab_size: 0,
+ space_or_tab_token: Token::SpaceOrTab,
+ token_1: Token::Data,
+ token_2: Token::Data,
+ token_3: Token::Data,
+ token_4: Token::Data,
+ token_5: Token::Data,
+ },
map: EditMap::new(),
label_start_stack: vec![],
label_start_list_loose: vec![],
@@ -494,11 +587,14 @@ impl<'a> Tokenizer<'a> {
state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
after: impl FnOnce(&mut Tokenizer) -> State + 'static,
) -> Box<StateFn> {
+ self.attempt_balance += 1;
attempt_impl(
state_fn,
None,
self.point.index,
|tokenizer: &mut Tokenizer, state| {
+ tokenizer.attempt_balance -= 1;
+
if matches!(state, State::Ok) {
tokenizer.consumed = true;
State::Fn(Box::new(after))
@@ -522,11 +618,13 @@ impl<'a> Tokenizer<'a> {
until: impl Fn(Option<u8>) -> bool + 'static,
done: impl FnOnce(State) -> Box<StateFn> + 'static,
) -> Box<StateFn> {
+ self.attempt_balance += 1;
attempt_impl(
state_fn,
Some(Box::new(until)),
self.point.index,
|tokenizer: &mut Tokenizer, state| {
+ tokenizer.attempt_balance -= 1;
tokenizer.consumed = true;
// We don’t capture/free state because it is assumed that
// `go_until` itself is wrapped in another attempt that does
@@ -550,6 +648,7 @@ impl<'a> Tokenizer<'a> {
state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
done: impl FnOnce(bool) -> Box<StateFn> + 'static,
) -> Box<StateFn> {
+ self.attempt_balance += 1;
let previous = self.capture();
attempt_impl(
@@ -557,6 +656,7 @@ impl<'a> Tokenizer<'a> {
None,
self.point.index,
|tokenizer: &mut Tokenizer, state| {
+ tokenizer.attempt_balance -= 1;
tokenizer.free(previous);
tokenizer.consumed = true;
State::Fn(done(matches!(state, State::Ok)))
@@ -580,6 +680,7 @@ impl<'a> Tokenizer<'a> {
state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
done: impl FnOnce(bool) -> Box<StateFn> + 'static,
) -> Box<StateFn> {
+ self.attempt_balance += 1;
let previous = self.capture();
attempt_impl(
@@ -587,6 +688,7 @@ impl<'a> Tokenizer<'a> {
None,
self.point.index,
|tokenizer: &mut Tokenizer, state| {
+ tokenizer.attempt_balance -= 1;
let ok = matches!(state, State::Ok);
if !ok {
@@ -782,7 +884,47 @@ fn attempt_impl(
let state = state(tokenizer);
match state {
- State::Ok | State::Nok => done(tokenizer, state),
+ State::Ok | State::Nok => {
+ if tokenizer.attempt_balance == 0 {
+ debug_assert!(!tokenizer.tokenize_state.connect);
+ debug_assert_eq!(tokenizer.tokenize_state.document_continued, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.document_index, 0);
+ debug_assert!(!tokenizer.tokenize_state.document_interrupt_before);
+ debug_assert!(!tokenizer.tokenize_state.document_paragraph_before);
+ debug_assert_eq!(tokenizer.tokenize_state.marker, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.marker_other, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.prefix, 0);
+ debug_assert!(!tokenizer.tokenize_state.seen);
+ debug_assert_eq!(tokenizer.tokenize_state.size, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.size_other, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.stop.len(), 0);
+ debug_assert_eq!(tokenizer.tokenize_state.start, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.end, 0);
+ debug_assert!(tokenizer.tokenize_state.return_state.is_none());
+ debug_assert!(!tokenizer.tokenize_state.space_or_tab_eol_connect);
+ debug_assert!(!tokenizer.tokenize_state.space_or_tab_eol_ok);
+ debug_assert!(tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .is_none());
+ debug_assert!(!tokenizer.tokenize_state.space_or_tab_connect);
+ debug_assert!(tokenizer.tokenize_state.space_or_tab_content_type.is_none());
+ debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_min, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_max, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_size, 0);
+ debug_assert_eq!(
+ tokenizer.tokenize_state.space_or_tab_token,
+ Token::SpaceOrTab
+ );
+ debug_assert_eq!(tokenizer.tokenize_state.token_1, Token::Data);
+ debug_assert_eq!(tokenizer.tokenize_state.token_2, Token::Data);
+ debug_assert_eq!(tokenizer.tokenize_state.token_3, Token::Data);
+ debug_assert_eq!(tokenizer.tokenize_state.token_4, Token::Data);
+ debug_assert_eq!(tokenizer.tokenize_state.token_5, Token::Data);
+ }
+
+ done(tokenizer, state)
+ }
State::Fn(func) => State::Fn(attempt_impl(func, pause, start, done)),
}
})