aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-12 19:04:31 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-12 19:04:31 +0200
commit395b13daf6dd6da0204302d344caa710ea891d62 (patch)
tree4a7c688af7a70c7e3b694d87ba66e01dd0670cf6
parent6dc2011d69c85820feddf6799142d304cc2eeb29 (diff)
downloadmarkdown-rs-395b13daf6dd6da0204302d344caa710ea891d62.tar.gz
markdown-rs-395b13daf6dd6da0204302d344caa710ea891d62.tar.bz2
markdown-rs-395b13daf6dd6da0204302d344caa710ea891d62.zip
Refactor to attempt less if never needed
-rw-r--r--src/construct/blank_line.rs12
-rw-r--r--src/construct/block_quote.rs24
-rw-r--r--src/construct/code_fenced.rs131
-rw-r--r--src/construct/code_indented.rs16
-rw-r--r--src/construct/definition.rs92
-rw-r--r--src/construct/heading_atx.rs24
-rw-r--r--src/construct/heading_setext.rs36
-rw-r--r--src/construct/html_flow.rs33
-rw-r--r--src/construct/html_text.rs14
-rw-r--r--src/construct/label_end.rs44
-rw-r--r--src/construct/list.rs37
-rw-r--r--src/construct/partial_space_or_tab_eol.rs61
-rw-r--r--src/construct/partial_title.rs84
-rw-r--r--src/construct/thematic_break.rs52
14 files changed, 367 insertions, 293 deletions
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index 81b58fc..7f1d023 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -47,12 +47,12 @@ use crate::tokenizer::Tokenizer;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::BlankLineAfter),
- State::Next(StateName::BlankLineAfter),
- );
-
- State::Retry(space_or_tab(tokenizer))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::BlankLineAfter), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::BlankLineAfter)
+ }
}
/// At eof/eol, after optional whitespace.
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 6e660cb..4f0870f 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -64,16 +64,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(State::Next(StateName::BlockQuoteContBefore), State::Nok);
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::BlockQuoteContBefore), State::Nok);
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 1,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ } else {
+ State::Retry(StateName::BlockQuoteContBefore)
+ }
}
/// At `>`, after optional whitespace.
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index be0542a..74d6fe1 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -118,25 +118,32 @@ use crate::util::slice::{Position, Slice};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.code_fenced {
- tokenizer.enter(Name::CodeFenced);
- tokenizer.enter(Name::CodeFencedFence);
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.enter(Name::CodeFenced);
+ tokenizer.enter(Name::CodeFencedFence);
+ tokenizer.attempt(
+ State::Next(StateName::CodeFencedBeforeSequenceOpen),
+ State::Nok,
+ );
+ return State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ));
+ }
- tokenizer.attempt(
- State::Next(StateName::CodeFencedBeforeSequenceOpen),
- State::Nok,
- );
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ))
- } else {
- State::Nok
+ if matches!(tokenizer.current, Some(b'`' | b'~')) {
+ tokenizer.enter(Name::CodeFenced);
+ tokenizer.enter(Name::CodeFencedFence);
+ return State::Retry(StateName::CodeFencedBeforeSequenceOpen);
+ }
}
+
+ State::Nok
}
/// In opening fence, after prefix, at sequence.
@@ -184,20 +191,18 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
State::Next(StateName::CodeFencedSequenceOpen)
- } else if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN {
- tokenizer.exit(Name::CodeFencedFenceSequence);
-
- tokenizer.attempt(
- State::Next(StateName::CodeFencedInfoBefore),
- State::Next(StateName::CodeFencedInfoBefore),
- );
-
- State::Retry(space_or_tab(tokenizer))
- } else {
+ } else if tokenizer.tokenize_state.size < CODE_FENCED_SEQUENCE_SIZE_MIN {
tokenizer.tokenize_state.marker = 0;
tokenizer.tokenize_state.size_c = 0;
tokenizer.tokenize_state.size = 0;
State::Nok
+ } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.exit(Name::CodeFencedFenceSequence);
+ tokenizer.attempt(State::Next(StateName::CodeFencedInfoBefore), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ tokenizer.exit(Name::CodeFencedFenceSequence);
+ State::Retry(StateName::CodeFencedInfoBefore)
}
}
@@ -254,10 +259,7 @@ pub fn info(tokenizer: &mut Tokenizer) -> State {
Some(b'\t' | b' ') => {
tokenizer.exit(Name::Data);
tokenizer.exit(Name::CodeFencedFenceInfo);
- tokenizer.attempt(
- State::Next(StateName::CodeFencedMetaBefore),
- State::Next(StateName::CodeFencedMetaBefore),
- );
+ tokenizer.attempt(State::Next(StateName::CodeFencedMetaBefore), State::Nok);
State::Retry(space_or_tab(tokenizer))
}
Some(byte) => {
@@ -362,20 +364,24 @@ pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
pub fn close_start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Name::CodeFencedFence);
- tokenizer.attempt(
- State::Next(StateName::CodeFencedBeforeSequenceClose),
- State::Nok,
- );
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::CodeFencedBeforeSequenceClose),
+ State::Nok,
+ );
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ))
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ } else {
+ State::Retry(StateName::CodeFencedBeforeSequenceClose)
+ }
}
/// In closing fence, after optional whitespace, at sequence.
@@ -413,11 +419,16 @@ pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
{
tokenizer.tokenize_state.size_b = 0;
tokenizer.exit(Name::CodeFencedFenceSequence);
- tokenizer.attempt(
- State::Next(StateName::CodeFencedAfterSequenceClose),
- State::Next(StateName::CodeFencedAfterSequenceClose),
- );
- State::Retry(space_or_tab(tokenizer))
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::CodeFencedAfterSequenceClose),
+ State::Nok,
+ );
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::CodeFencedAfterSequenceClose)
+ }
} else {
tokenizer.tokenize_state.size_b = 0;
State::Nok
@@ -466,15 +477,19 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State {
/// | ~~~
/// ```
pub fn content_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::CodeFencedBeforeContentChunk),
- State::Nok,
- );
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- tokenizer.tokenize_state.size_c,
- ))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::CodeFencedBeforeContentChunk),
+ State::Nok,
+ );
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ tokenizer.tokenize_state.size_c,
+ ))
+ } else {
+ State::Retry(StateName::CodeFencedBeforeContentChunk)
+ }
}
/// Before code content, after optional prefix.
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 5805346..cf111f4 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -63,7 +63,10 @@ use crate::tokenizer::Tokenizer;
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
// Do not interrupt paragraphs.
- if !tokenizer.interrupt && tokenizer.parse_state.constructs.code_indented {
+ if !tokenizer.interrupt
+ && tokenizer.parse_state.constructs.code_indented
+ && matches!(tokenizer.current, Some(b'\t' | b' '))
+ {
tokenizer.enter(Name::CodeIndented);
tokenizer.attempt(State::Next(StateName::CodeIndentedAtBreak), State::Nok);
State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
@@ -158,11 +161,12 @@ pub fn further_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn further_begin(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::CodeIndentedFurtherAfter),
- State::Next(StateName::CodeIndentedFurtherAfter),
- );
- State::Retry(space_or_tab(tokenizer))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::CodeIndentedFurtherAfter), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Nok
+ }
}
/// After whitespace, not indented enough.
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 2378c48..e242e23 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -112,24 +112,26 @@ use crate::util::{
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
// Do not interrupt paragraphs (but do follow definitions).
- let possible = !tokenizer.interrupt
- || (!tokenizer.events.is_empty()
- && tokenizer.events[skip::opt_back(
- &tokenizer.events,
- tokenizer.events.len() - 1,
- &[Name::LineEnding, Name::SpaceOrTab],
- )]
- .name
- == Name::Definition);
-
- if possible && tokenizer.parse_state.constructs.definition {
+ if tokenizer.parse_state.constructs.definition
+ && (!tokenizer.interrupt
+ || (!tokenizer.events.is_empty()
+ && tokenizer.events[skip::opt_back(
+ &tokenizer.events,
+ tokenizer.events.len() - 1,
+ &[Name::LineEnding, Name::SpaceOrTab],
+ )]
+ .name
+ == Name::Definition))
+ {
tokenizer.enter(Name::Definition);
- tokenizer.attempt(
- State::Next(StateName::DefinitionBefore),
- State::Next(StateName::DefinitionBefore),
- );
- // Note: arbitrary whitespace allowed even if code (indented) is on.
- State::Retry(space_or_tab(tokenizer))
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ // Note: arbitrary whitespace allowed even if code (indented) is on.
+ tokenizer.attempt(State::Next(StateName::DefinitionBefore), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::DefinitionBefore)
+ }
} else {
State::Nok
}
@@ -189,11 +191,15 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::DefinitionDestinationBefore),
- State::Next(StateName::DefinitionDestinationBefore),
- );
- State::Retry(space_or_tab_eol(tokenizer))
+ if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::DefinitionDestinationBefore),
+ State::Next(StateName::DefinitionDestinationBefore),
+ );
+ State::Retry(space_or_tab_eol(tokenizer))
+ } else {
+ State::Retry(StateName::DefinitionDestinationBefore)
+ }
}
/// Before destination.
@@ -257,11 +263,15 @@ pub fn destination_missing(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::DefinitionAfterWhitespace),
- State::Next(StateName::DefinitionAfterWhitespace),
- );
- State::Retry(space_or_tab(tokenizer))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::DefinitionAfterWhitespace),
+ State::Nok,
+ );
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::DefinitionAfterWhitespace)
+ }
}
/// After definition, after optional whitespace.
@@ -313,11 +323,15 @@ pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn title_before(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::DefinitionTitleBeforeMarker),
- State::Nok,
- );
- State::Retry(space_or_tab_eol(tokenizer))
+ if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::DefinitionTitleBeforeMarker),
+ State::Nok,
+ );
+ State::Retry(space_or_tab_eol(tokenizer))
+ } else {
+ State::Nok
+ }
}
/// At title.
@@ -345,11 +359,15 @@ pub fn title_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Name::Data;
tokenizer.tokenize_state.token_2 = Name::Data;
tokenizer.tokenize_state.token_3 = Name::Data;
- tokenizer.attempt(
- State::Next(StateName::DefinitionTitleAfterOptionalWhitespace),
- State::Next(StateName::DefinitionTitleAfterOptionalWhitespace),
- );
- State::Retry(space_or_tab(tokenizer))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::DefinitionTitleAfterOptionalWhitespace),
+ State::Nok,
+ );
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::DefinitionTitleAfterOptionalWhitespace)
+ }
}
/// After title, after optional whitespace.
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 22b93db..30c22f3 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -70,16 +70,20 @@ use crate::tokenizer::Tokenizer;
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.heading_atx {
tokenizer.enter(Name::HeadingAtx);
- tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok);
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok);
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ } else {
+ State::Retry(StateName::HeadingAtxBefore)
+ }
} else {
State::Nok
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 4e6345a..1f6270a 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -85,16 +85,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
.name
== Name::Paragraph)
{
- tokenizer.attempt(State::Next(StateName::HeadingSetextBefore), State::Nok);
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::HeadingSetextBefore), State::Nok);
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ } else {
+ State::Retry(StateName::HeadingSetextBefore)
+ }
} else {
State::Nok
}
@@ -132,11 +136,13 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
} else {
tokenizer.tokenize_state.marker = 0;
tokenizer.exit(Name::HeadingSetextUnderline);
- tokenizer.attempt(
- State::Next(StateName::HeadingSetextAfter),
- State::Next(StateName::HeadingSetextAfter),
- );
- State::Retry(space_or_tab(tokenizer))
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::HeadingSetextAfter), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::HeadingSetextAfter)
+ }
}
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 123e1a3..e90abc4 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -133,21 +133,26 @@ const COMPLETE: u8 = 7;
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.html_flow {
tokenizer.enter(Name::HtmlFlow);
- tokenizer.attempt(State::Next(StateName::HtmlFlowBefore), State::Nok);
- State::Retry(space_or_tab_with_options(
- tokenizer,
- SpaceOrTabOptions {
- kind: Name::HtmlFlowData,
- min: 0,
- max: if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::HtmlFlowBefore), State::Nok);
+ State::Retry(space_or_tab_with_options(
+ tokenizer,
+ SpaceOrTabOptions {
+ kind: Name::HtmlFlowData,
+ min: 0,
+ max: if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ connect: false,
+ content: None,
},
- connect: false,
- content: None,
- },
- ))
+ ))
+ } else {
+ State::Retry(StateName::HtmlFlowBefore)
+ }
} else {
State::Nok
}
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index ffbc768..c3b0a65 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -666,11 +666,15 @@ pub fn line_ending_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn line_ending_after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::HtmlTextLineEndingAfterPrefix),
- State::Next(StateName::HtmlTextLineEndingAfterPrefix),
- );
- State::Retry(space_or_tab(tokenizer))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::HtmlTextLineEndingAfterPrefix),
+ State::Nok,
+ );
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ State::Retry(StateName::HtmlTextLineEndingAfterPrefix)
+ }
}
/// After eol, after optional whitespace.
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 7f80415..8921fcc 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -362,11 +362,15 @@ pub fn resource_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn resource_before(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::LabelEndResourceOpen),
- State::Next(StateName::LabelEndResourceOpen),
- );
- State::Retry(space_or_tab_eol(tokenizer))
+ if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::LabelEndResourceOpen),
+ State::Next(StateName::LabelEndResourceOpen),
+ );
+ State::Retry(space_or_tab_eol(tokenizer))
+ } else {
+ State::Retry(StateName::LabelEndResourceOpen)
+ }
}
/// In resource, after optional whitespace, at `)` or a destination.
@@ -407,11 +411,16 @@ pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_4 = Name::Data;
tokenizer.tokenize_state.token_5 = Name::Data;
tokenizer.tokenize_state.size_b = 0;
- tokenizer.attempt(
- State::Next(StateName::LabelEndResourceBetween),
- State::Next(StateName::LabelEndResourceEnd),
- );
- State::Retry(space_or_tab_eol(tokenizer))
+
+ if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::LabelEndResourceBetween),
+ State::Next(StateName::LabelEndResourceEnd),
+ );
+ State::Retry(space_or_tab_eol(tokenizer))
+ } else {
+ State::Retry(StateName::LabelEndResourceEnd)
+ }
}
/// At invalid destination.
@@ -462,11 +471,16 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_1 = Name::Data;
tokenizer.tokenize_state.token_2 = Name::Data;
tokenizer.tokenize_state.token_3 = Name::Data;
- tokenizer.attempt(
- State::Next(StateName::LabelEndResourceEnd),
- State::Next(StateName::LabelEndResourceEnd),
- );
- State::Retry(space_or_tab_eol(tokenizer))
+
+ if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::LabelEndResourceBetween),
+ State::Next(StateName::LabelEndResourceEnd),
+ );
+ State::Retry(space_or_tab_eol(tokenizer))
+ } else {
+ State::Retry(StateName::LabelEndResourceEnd)
+ }
}
/// In resource, at `)`.
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 076ff58..596330c 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -64,16 +64,21 @@ use crate::util::{
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.list {
tokenizer.enter(Name::ListItem);
- tokenizer.attempt(State::Next(StateName::ListBefore), State::Nok);
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ))
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::ListBefore), State::Nok);
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ } else {
+ State::Retry(StateName::ListBefore)
+ }
} else {
State::Nok
}
@@ -319,9 +324,11 @@ pub fn cont_blank(tokenizer: &mut Tokenizer) -> State {
if container.blank_initial {
State::Nok
- } else {
+ } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
// Consume, optionally, at most `size`.
State::Retry(space_or_tab_min_max(tokenizer, 0, size))
+ } else {
+ State::Ok
}
}
@@ -339,8 +346,12 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
container.blank_initial = false;
- // Consume exactly `size`.
- State::Retry(space_or_tab_min_max(tokenizer, size, size))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ // Consume exactly `size`.
+ State::Retry(space_or_tab_min_max(tokenizer, size, size))
+ } else {
+ State::Nok
+ }
}
/// Find adjacent list items with the same marker.
diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs
index b38bc64..427cb11 100644
--- a/src/construct/partial_space_or_tab_eol.rs
+++ b/src/construct/partial_space_or_tab_eol.rs
@@ -54,21 +54,25 @@ pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options
/// | ␠␠b
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::SpaceOrTabEolAfterFirst),
- State::Next(StateName::SpaceOrTabEolAtEol),
- );
+ if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::SpaceOrTabEolAfterFirst),
+ State::Next(StateName::SpaceOrTabEolAtEol),
+ );
- State::Retry(space_or_tab_with_options(
- tokenizer,
- SpaceOrTabOptions {
- kind: Name::SpaceOrTab,
- min: 1,
- max: usize::MAX,
- content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
- connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
- },
- ))
+ State::Retry(space_or_tab_with_options(
+ tokenizer,
+ SpaceOrTabOptions {
+ kind: Name::SpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
+ connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
+ },
+ ))
+ } else {
+ State::Nok
+ }
}
/// After initial whitespace, at optional eol.
@@ -151,20 +155,21 @@ pub fn at_eol(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::SpaceOrTabEolAfterMore),
- State::Next(StateName::SpaceOrTabEolAfterMore),
- );
- State::Retry(space_or_tab_with_options(
- tokenizer,
- SpaceOrTabOptions {
- kind: Name::SpaceOrTab,
- min: 1,
- max: usize::MAX,
- content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
- connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
- },
- ))
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::SpaceOrTabEolAfterMore), State::Nok);
+ State::Retry(space_or_tab_with_options(
+ tokenizer,
+ SpaceOrTabOptions {
+ kind: Name::SpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
+ connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
+ },
+ ))
+ } else {
+ State::Retry(StateName::SpaceOrTabEolAfterMore)
+ }
}
/// After optional final whitespace.
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 93dbd28..f0c4931 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -66,22 +66,17 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn begin(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'"' | b'\'' | b')')
- if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
- {
- tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
- tokenizer.consume();
- tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
- tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.connect = false;
- State::Ok
- }
- _ => {
- tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
- State::Retry(StateName::TitleAtBreak)
- }
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
+ tokenizer.consume();
+ tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.connect = false;
+ State::Ok
+ } else {
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
+ State::Retry(StateName::TitleAtBreak)
}
}
@@ -92,13 +87,11 @@ pub fn begin(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn at_break(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None => {
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.connect = false;
- State::Nok
- }
- Some(b'\n') => {
+ if let Some(byte) = tokenizer.current {
+ if byte == tokenizer.tokenize_state.marker {
+ tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
+ State::Retry(StateName::TitleBegin)
+ } else if byte == b'\n' {
tokenizer.attempt(
State::Next(StateName::TitleAfterEol),
State::Next(StateName::TitleAtBlankLine),
@@ -110,14 +103,7 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
connect: tokenizer.tokenize_state.connect,
},
))
- }
- Some(b'"' | b'\'' | b')')
- if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
- {
- tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
- State::Retry(StateName::TitleBegin)
- }
- Some(_) => {
+ } else {
tokenizer.enter_link(
Name::Data,
Link {
@@ -136,6 +122,10 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::TitleInside)
}
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.connect = false;
+ State::Nok
}
}
@@ -172,25 +162,19 @@ pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn inside(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => {
- tokenizer.exit(Name::Data);
- State::Retry(StateName::TitleAtBreak)
- }
- Some(b'"' | b'\'' | b')')
- if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
- {
- tokenizer.exit(Name::Data);
- State::Retry(StateName::TitleAtBreak)
- }
- Some(byte) => {
- tokenizer.consume();
- State::Next(if matches!(byte, b'\\') {
- StateName::TitleEscape
- } else {
- StateName::TitleInside
- })
- }
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker)
+ || matches!(tokenizer.current, None | Some(b'\n'))
+ {
+ tokenizer.exit(Name::Data);
+ State::Retry(StateName::TitleAtBreak)
+ } else {
+ let name = if tokenizer.current == Some(b'\\') {
+ StateName::TitleEscape
+ } else {
+ StateName::TitleInside
+ };
+ tokenizer.consume();
+ State::Next(name)
}
}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index af8206e..f493b96 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -63,16 +63,21 @@ use crate::tokenizer::Tokenizer;
pub fn start(tokenizer: &mut Tokenizer) -> State {
if tokenizer.parse_state.constructs.thematic_break {
tokenizer.enter(Name::ThematicBreak);
- tokenizer.attempt(State::Next(StateName::ThematicBreakBefore), State::Nok);
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ))
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(State::Next(StateName::ThematicBreakBefore), State::Nok);
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ } else {
+ State::Retry(StateName::ThematicBreakBefore)
+ }
} else {
State::Nok
}
@@ -127,21 +132,16 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn sequence(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'*' | b'-' | b'_')
- if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
- {
- tokenizer.consume();
- tokenizer.tokenize_state.size += 1;
- State::Next(StateName::ThematicBreakSequence)
- }
- _ => {
- tokenizer.exit(Name::ThematicBreakSequence);
- tokenizer.attempt(
- State::Next(StateName::ThematicBreakAtBreak),
- State::Next(StateName::ThematicBreakAtBreak),
- );
- State::Retry(space_or_tab(tokenizer))
- }
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.consume();
+ tokenizer.tokenize_state.size += 1;
+ State::Next(StateName::ThematicBreakSequence)
+ } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.exit(Name::ThematicBreakSequence);
+ tokenizer.attempt(State::Next(StateName::ThematicBreakAtBreak), State::Nok);
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ tokenizer.exit(Name::ThematicBreakSequence);
+ State::Retry(StateName::ThematicBreakAtBreak)
}
}