aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/construct/attention.rs53
-rw-r--r--src/construct/autolink.rs19
-rw-r--r--src/construct/character_escape.rs17
-rw-r--r--src/construct/character_reference.rs17
-rw-r--r--src/construct/code_fenced.rs127
-rw-r--r--src/construct/code_indented.rs36
-rw-r--r--src/construct/hard_break_escape.rs13
-rw-r--r--src/construct/heading_atx.rs24
-rw-r--r--src/construct/heading_setext.rs25
-rw-r--r--src/construct/html_flow.rs116
-rw-r--r--src/construct/html_text.rs41
-rw-r--r--src/construct/label_start_image.rs17
-rw-r--r--src/construct/label_start_link.rs31
-rw-r--r--src/construct/list.rs53
-rw-r--r--src/construct/partial_data.rs57
-rw-r--r--src/construct/partial_destination.rs65
-rw-r--r--src/construct/partial_space_or_tab.rs31
-rw-r--r--src/construct/thematic_break.rs36
-rw-r--r--src/state.rs2
19 files changed, 368 insertions, 412 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 3a29d06..ae8da81 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -118,13 +118,13 @@ struct Sequence {
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'*' | b'_') if tokenizer.parse_state.constructs.attention => {
- tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
- tokenizer.enter(Name::AttentionSequence);
- State::Retry(StateName::AttentionInside)
- }
- _ => State::Nok,
+ if tokenizer.parse_state.constructs.attention && matches!(tokenizer.current, Some(b'*' | b'_'))
+ {
+ tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
+ tokenizer.enter(Name::AttentionSequence);
+ State::Retry(StateName::AttentionInside)
+ } else {
+ State::Nok
}
}
@@ -135,17 +135,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^^
/// ```
pub fn inside(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'*' | b'_') if tokenizer.current == Some(tokenizer.tokenize_state.marker) => {
- tokenizer.consume();
- State::Next(StateName::AttentionInside)
- }
- _ => {
- tokenizer.exit(Name::AttentionSequence);
- tokenizer.register_resolver(ResolveName::Attention);
- tokenizer.tokenize_state.marker = b'\0';
- State::Ok
- }
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.consume();
+ State::Next(StateName::AttentionInside)
+ } else {
+ tokenizer.exit(Name::AttentionSequence);
+ tokenizer.register_resolver(ResolveName::Attention);
+ tokenizer.tokenize_state.marker = b'\0';
+ State::Ok
}
}
@@ -437,14 +434,22 @@ fn match_sequences(
///
/// * [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)
fn classify_character(char: Option<char>) -> CharacterKind {
- match char {
- // EOF.
- None => CharacterKind::Whitespace,
+ if let Some(char) = char {
// Unicode whitespace.
- Some(char) if char.is_whitespace() => CharacterKind::Whitespace,
+ if char.is_whitespace() {
+ CharacterKind::Whitespace
+ }
// Unicode punctuation.
- Some(char) if PUNCTUATION.contains(&char) => CharacterKind::Punctuation,
+ else if PUNCTUATION.contains(&char) {
+ CharacterKind::Punctuation
+ }
// Everything else.
- Some(_) => CharacterKind::Other,
+ else {
+ CharacterKind::Other
+ }
+ }
+ // EOF.
+ else {
+ CharacterKind::Whitespace
}
}
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 1bb8004..5c826a3 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -115,16 +115,15 @@ use crate::tokenizer::Tokenizer;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'<') if tokenizer.parse_state.constructs.autolink => {
- tokenizer.enter(Name::Autolink);
- tokenizer.enter(Name::AutolinkMarker);
- tokenizer.consume();
- tokenizer.exit(Name::AutolinkMarker);
- tokenizer.enter(Name::AutolinkProtocol);
- State::Next(StateName::AutolinkOpen)
- }
- _ => State::Nok,
+ if tokenizer.parse_state.constructs.autolink && tokenizer.current == Some(b'<') {
+ tokenizer.enter(Name::Autolink);
+ tokenizer.enter(Name::AutolinkMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::AutolinkMarker);
+ tokenizer.enter(Name::AutolinkProtocol);
+ State::Next(StateName::AutolinkOpen)
+ } else {
+ State::Nok
}
}
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index 494f1d2..ac91c29 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -44,15 +44,14 @@ use crate::tokenizer::Tokenizer;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'\\') if tokenizer.parse_state.constructs.character_escape => {
- tokenizer.enter(Name::CharacterEscape);
- tokenizer.enter(Name::CharacterEscapeMarker);
- tokenizer.consume();
- tokenizer.exit(Name::CharacterEscapeMarker);
- State::Next(StateName::CharacterEscapeInside)
- }
- _ => State::Nok,
+ if tokenizer.parse_state.constructs.character_escape && tokenizer.current == Some(b'\\') {
+ tokenizer.enter(Name::CharacterEscape);
+ tokenizer.enter(Name::CharacterEscapeMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::CharacterEscapeMarker);
+ State::Next(StateName::CharacterEscapeInside)
+ } else {
+ State::Nok
}
}
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 6171927..7d7b6f9 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -81,15 +81,14 @@ use crate::util::slice::Slice;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'&') if tokenizer.parse_state.constructs.character_reference => {
- tokenizer.enter(Name::CharacterReference);
- tokenizer.enter(Name::CharacterReferenceMarker);
- tokenizer.consume();
- tokenizer.exit(Name::CharacterReferenceMarker);
- State::Next(StateName::CharacterReferenceOpen)
- }
- _ => State::Nok,
+ if tokenizer.parse_state.constructs.character_reference && tokenizer.current == Some(b'&') {
+ tokenizer.enter(Name::CharacterReference);
+ tokenizer.enter(Name::CharacterReferenceMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::CharacterReferenceMarker);
+ State::Next(StateName::CharacterReferenceOpen)
+ } else {
+ State::Nok
}
}
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index ac9a63f..e69f33c 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -180,28 +180,24 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
/// | ~~~
/// ```
pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
- tokenizer.tokenize_state.size += 1;
- tokenizer.consume();
- State::Next(StateName::CodeFencedSequenceOpen)
- }
- _ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => {
- tokenizer.exit(Name::CodeFencedFenceSequence);
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::CodeFencedSequenceOpen)
+ } else if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN {
+ tokenizer.exit(Name::CodeFencedFenceSequence);
- tokenizer.attempt(
- State::Next(StateName::CodeFencedInfoBefore),
- State::Next(StateName::CodeFencedInfoBefore),
- );
+ tokenizer.attempt(
+ State::Next(StateName::CodeFencedInfoBefore),
+ State::Next(StateName::CodeFencedInfoBefore),
+ );
- State::Retry(space_or_tab(tokenizer))
- }
- _ => {
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.size_c = 0;
- tokenizer.tokenize_state.size = 0;
- State::Nok
- }
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size_c = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
}
@@ -257,16 +253,17 @@ pub fn info(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(space_or_tab(tokenizer))
}
- Some(b'`') if tokenizer.tokenize_state.marker == b'`' => {
- tokenizer.concrete = false;
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.size_c = 0;
- tokenizer.tokenize_state.size = 0;
- State::Nok
- }
- Some(_) => {
- tokenizer.consume();
- State::Next(StateName::CodeFencedInfo)
+ Some(byte) => {
+ if tokenizer.tokenize_state.marker == byte && byte == b'`' {
+ tokenizer.concrete = false;
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size_c = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ } else {
+ tokenizer.consume();
+ State::Next(StateName::CodeFencedInfo)
+ }
}
}
}
@@ -305,16 +302,17 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Name::CodeFencedFenceMeta);
State::Retry(StateName::CodeFencedInfoBefore)
}
- Some(b'`') if tokenizer.tokenize_state.marker == b'`' => {
- tokenizer.concrete = false;
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.size_c = 0;
- tokenizer.tokenize_state.size = 0;
- State::Nok
- }
- _ => {
- tokenizer.consume();
- State::Next(StateName::CodeFencedMeta)
+ Some(byte) => {
+ if tokenizer.tokenize_state.marker == byte && byte == b'`' {
+ tokenizer.concrete = false;
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size_c = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ } else {
+ tokenizer.consume();
+ State::Next(StateName::CodeFencedMeta)
+ }
}
}
}
@@ -392,12 +390,11 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
- tokenizer.enter(Name::CodeFencedFenceSequence);
- State::Retry(StateName::CodeFencedSequenceClose)
- }
- _ => State::Nok,
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.enter(Name::CodeFencedFenceSequence);
+ State::Retry(StateName::CodeFencedSequenceClose)
+ } else {
+ State::Nok
}
}
@@ -410,27 +407,23 @@ pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
- tokenizer.tokenize_state.size_b += 1;
- tokenizer.consume();
- State::Next(StateName::CodeFencedSequenceClose)
- }
- _ if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN
- && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size =>
- {
- tokenizer.tokenize_state.size_b = 0;
- tokenizer.exit(Name::CodeFencedFenceSequence);
- tokenizer.attempt(
- State::Next(StateName::CodeFencedAfterSequenceClose),
- State::Next(StateName::CodeFencedAfterSequenceClose),
- );
- State::Retry(space_or_tab(tokenizer))
- }
- _ => {
- tokenizer.tokenize_state.size_b = 0;
- State::Nok
- }
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.tokenize_state.size_b += 1;
+ tokenizer.consume();
+ State::Next(StateName::CodeFencedSequenceClose)
+ } else if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN
+ && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size
+ {
+ tokenizer.tokenize_state.size_b = 0;
+ tokenizer.exit(Name::CodeFencedFenceSequence);
+ tokenizer.attempt(
+ State::Next(StateName::CodeFencedAfterSequenceClose),
+ State::Next(StateName::CodeFencedAfterSequenceClose),
+ );
+ State::Retry(space_or_tab(tokenizer))
+ } else {
+ tokenizer.tokenize_state.size_b = 0;
+ State::Nok
}
}
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 3a82dc4..5805346 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -135,33 +135,19 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
/// | bbb
/// ```
pub fn further_start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'\n') if !tokenizer.lazy => {
- tokenizer.enter(Name::LineEnding);
- tokenizer.consume();
- tokenizer.exit(Name::LineEnding);
- State::Next(StateName::CodeIndentedFurtherStart)
- }
- _ if !tokenizer.lazy => {
- tokenizer.attempt(
- State::Next(StateName::CodeIndentedFurtherEnd),
- State::Next(StateName::CodeIndentedFurtherBegin),
- );
- State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
- }
- _ => State::Nok,
+ if tokenizer.lazy {
+ return State::Nok;
}
-}
-/// At eol, followed by an indented line.
-///
-/// ```markdown
-/// > | aaa
-/// ^
-/// | bbb
-/// ```
-pub fn further_end(_tokenizer: &mut Tokenizer) -> State {
- State::Ok
+ if tokenizer.current == Some(b'\n') {
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ State::Next(StateName::CodeIndentedFurtherStart)
+ } else {
+ tokenizer.attempt(State::Ok, State::Next(StateName::CodeIndentedFurtherBegin));
+ State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
+ }
}
/// At the beginning of a line that is not indented enough.
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index cec34d5..79d8150 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -51,13 +51,12 @@ use crate::tokenizer::Tokenizer;
/// | b
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => {
- tokenizer.enter(Name::HardBreakEscape);
- tokenizer.consume();
- State::Next(StateName::HardBreakEscapeAfter)
- }
- _ => State::Nok,
+ if tokenizer.parse_state.constructs.hard_break_escape && tokenizer.current == Some(b'\\') {
+ tokenizer.enter(Name::HardBreakEscape);
+ tokenizer.consume();
+ State::Next(StateName::HardBreakEscapeAfter)
+ } else {
+ State::Nok
}
}
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 974158f..f75805a 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -107,27 +107,29 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => {
+ if tokenizer.current == Some(b'#')
+ && tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX
+ {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::HeadingAtxSequenceOpen)
+ } else if tokenizer.tokenize_state.size > 0 {
+ if matches!(tokenizer.current, None | Some(b'\n')) {
tokenizer.tokenize_state.size = 0;
tokenizer.exit(Name::HeadingAtxSequence);
State::Retry(StateName::HeadingAtxAtBreak)
- }
- Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
- tokenizer.tokenize_state.size += 1;
- tokenizer.consume();
- State::Next(StateName::HeadingAtxSequenceOpen)
- }
- _ if tokenizer.tokenize_state.size > 0 => {
+ } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
tokenizer.tokenize_state.size = 0;
tokenizer.exit(Name::HeadingAtxSequence);
tokenizer.attempt(State::Next(StateName::HeadingAtxAtBreak), State::Nok);
State::Retry(space_or_tab(tokenizer))
- }
- _ => {
+ } else {
tokenizer.tokenize_state.size = 0;
State::Nok
}
+ } else {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 8485f5a..4e6345a 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -126,20 +126,17 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn inside(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'-' | b'=') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
- tokenizer.consume();
- State::Next(StateName::HeadingSetextInside)
- }
- _ => {
- tokenizer.tokenize_state.marker = 0;
- tokenizer.exit(Name::HeadingSetextUnderline);
- tokenizer.attempt(
- State::Next(StateName::HeadingSetextAfter),
- State::Next(StateName::HeadingSetextAfter),
- );
- State::Retry(space_or_tab(tokenizer))
- }
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.consume();
+ State::Next(StateName::HeadingSetextInside)
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.exit(Name::HeadingSetextUnderline);
+ tokenizer.attempt(
+ State::Next(StateName::HeadingSetextAfter),
+ State::Next(StateName::HeadingSetextAfter),
+ );
+ State::Retry(space_or_tab(tokenizer))
}
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 2da4f47..2d685b6 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -525,21 +525,17 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => {
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.marker_b = 0;
- State::Nok
- }
- Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker_b => {
- tokenizer.tokenize_state.marker_b = 0;
- tokenizer.consume();
- State::Next(StateName::HtmlFlowCompleteAttributeValueQuotedAfter)
- }
- _ => {
- tokenizer.consume();
- State::Next(StateName::HtmlFlowCompleteAttributeValueQuoted)
- }
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker_b) {
+ tokenizer.tokenize_state.marker_b = 0;
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowCompleteAttributeValueQuotedAfter)
+ } else if matches!(tokenizer.current, None | Some(b'\n')) {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.marker_b = 0;
+ State::Nok
+ } else {
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowCompleteAttributeValueQuoted)
}
}
@@ -624,47 +620,37 @@ pub fn complete_after(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn continuation(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'\n')
- if tokenizer.tokenize_state.marker == BASIC
- || tokenizer.tokenize_state.marker == COMPLETE =>
- {
- tokenizer.exit(Name::HtmlFlowData);
- tokenizer.check(
- State::Next(StateName::HtmlFlowContinuationAfter),
- State::Next(StateName::HtmlFlowContinuationStart),
- );
- State::Retry(StateName::HtmlFlowBlankLineBefore)
- }
- // Note: important that this is after the basic/complete case.
- None | Some(b'\n') => {
- tokenizer.exit(Name::HtmlFlowData);
- State::Retry(StateName::HtmlFlowContinuationStart)
- }
- Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {
- tokenizer.consume();
- State::Next(StateName::HtmlFlowContinuationCommentInside)
- }
- Some(b'<') if tokenizer.tokenize_state.marker == RAW => {
- tokenizer.consume();
- State::Next(StateName::HtmlFlowContinuationRawTagOpen)
- }
- Some(b'>') if tokenizer.tokenize_state.marker == DECLARATION => {
- tokenizer.consume();
- State::Next(StateName::HtmlFlowContinuationClose)
- }
- Some(b'?') if tokenizer.tokenize_state.marker == INSTRUCTION => {
- tokenizer.consume();
- State::Next(StateName::HtmlFlowContinuationDeclarationInside)
- }
- Some(b']') if tokenizer.tokenize_state.marker == CDATA => {
- tokenizer.consume();
- State::Next(StateName::HtmlFlowContinuationCdataInside)
- }
- _ => {
- tokenizer.consume();
- State::Next(StateName::HtmlFlowContinuation)
- }
+ if tokenizer.tokenize_state.marker == COMMENT && tokenizer.current == Some(b'-') {
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowContinuationCommentInside)
+ } else if tokenizer.tokenize_state.marker == RAW && tokenizer.current == Some(b'<') {
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowContinuationRawTagOpen)
+ } else if tokenizer.tokenize_state.marker == DECLARATION && tokenizer.current == Some(b'>') {
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowContinuationClose)
+ } else if tokenizer.tokenize_state.marker == INSTRUCTION && tokenizer.current == Some(b'?') {
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowContinuationDeclarationInside)
+ } else if tokenizer.tokenize_state.marker == CDATA && tokenizer.current == Some(b']') {
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowContinuationCdataInside)
+ } else if (tokenizer.tokenize_state.marker == BASIC
+ || tokenizer.tokenize_state.marker == COMPLETE)
+ && tokenizer.current == Some(b'\n')
+ {
+ tokenizer.exit(Name::HtmlFlowData);
+ tokenizer.check(
+ State::Next(StateName::HtmlFlowContinuationAfter),
+ State::Next(StateName::HtmlFlowContinuationStart),
+ );
+ State::Retry(StateName::HtmlFlowBlankLineBefore)
+ } else if matches!(tokenizer.current, None | Some(b'\n')) {
+ tokenizer.exit(Name::HtmlFlowData);
+ State::Retry(StateName::HtmlFlowContinuationStart)
+ } else {
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowContinuation)
}
}
@@ -822,16 +808,14 @@ pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'>') => {
- tokenizer.consume();
- State::Next(StateName::HtmlFlowContinuationClose)
- }
- Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => {
- tokenizer.consume();
- State::Next(StateName::HtmlFlowContinuationDeclarationInside)
- }
- _ => State::Retry(StateName::HtmlFlowContinuation),
+ if tokenizer.tokenize_state.marker == COMMENT && tokenizer.current == Some(b'-') {
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowContinuationDeclarationInside)
+ } else if tokenizer.current == Some(b'>') {
+ tokenizer.consume();
+ State::Next(StateName::HtmlFlowContinuationClose)
+ } else {
+ State::Retry(StateName::HtmlFlowContinuation)
}
}
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index fde78de..ffbc768 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -558,26 +558,27 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None => {
- tokenizer.tokenize_state.marker = 0;
- State::Nok
- }
- Some(b'\n') => {
- tokenizer.attempt(
- State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted),
- State::Nok,
- );
- State::Retry(StateName::HtmlTextLineEndingBefore)
- }
- Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
- tokenizer.tokenize_state.marker = 0;
- tokenizer.consume();
- State::Next(StateName::HtmlTextTagOpenAttributeValueQuotedAfter)
- }
- _ => {
- tokenizer.consume();
- State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted)
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.consume();
+ State::Next(StateName::HtmlTextTagOpenAttributeValueQuotedAfter)
+ } else {
+ match tokenizer.current {
+ None => {
+ tokenizer.tokenize_state.marker = 0;
+ State::Nok
+ }
+ Some(b'\n') => {
+ tokenizer.attempt(
+ State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted),
+ State::Nok,
+ );
+ State::Retry(StateName::HtmlTextLineEndingBefore)
+ }
+ _ => {
+ tokenizer.consume();
+ State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted)
+ }
}
}
}
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index ce09f5b..b2890e6 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -40,15 +40,14 @@ use crate::tokenizer::{LabelStart, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'!') if tokenizer.parse_state.constructs.label_start_image => {
- tokenizer.enter(Name::LabelImage);
- tokenizer.enter(Name::LabelImageMarker);
- tokenizer.consume();
- tokenizer.exit(Name::LabelImageMarker);
- State::Next(StateName::LabelStartImageOpen)
- }
- _ => State::Nok,
+ if tokenizer.parse_state.constructs.label_start_image && tokenizer.current == Some(b'!') {
+ tokenizer.enter(Name::LabelImage);
+ tokenizer.enter(Name::LabelImageMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::LabelImageMarker);
+ State::Next(StateName::LabelStartImageOpen)
+ } else {
+ State::Nok
}
}
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index 7288d5d..8089e80 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -39,21 +39,20 @@ use crate::tokenizer::{LabelStart, Tokenizer};
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'[') if tokenizer.parse_state.constructs.label_start_link => {
- let start = tokenizer.events.len();
- tokenizer.enter(Name::LabelLink);
- tokenizer.enter(Name::LabelMarker);
- tokenizer.consume();
- tokenizer.exit(Name::LabelMarker);
- tokenizer.exit(Name::LabelLink);
- tokenizer.tokenize_state.label_start_stack.push(LabelStart {
- start: (start, tokenizer.events.len() - 1),
- inactive: false,
- });
- tokenizer.register_resolver_before(ResolveName::Label);
- State::Ok
- }
- _ => State::Nok,
+ if tokenizer.parse_state.constructs.label_start_link && tokenizer.current == Some(b'[') {
+ let start = tokenizer.events.len();
+ tokenizer.enter(Name::LabelLink);
+ tokenizer.enter(Name::LabelMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::LabelMarker);
+ tokenizer.exit(Name::LabelLink);
+ tokenizer.tokenize_state.label_start_stack.push(LabelStart {
+ start: (start, tokenizer.events.len() - 1),
+ inactive: false,
+ });
+ tokenizer.register_resolver_before(ResolveName::Label);
+ State::Ok
+ } else {
+ State::Nok
}
}
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 206f823..076ff58 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -86,17 +86,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn before(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- // Unordered.
- Some(b'*' | b'-') => {
- tokenizer.check(State::Nok, State::Next(StateName::ListBeforeUnordered));
- State::Retry(StateName::ThematicBreakStart)
- }
- Some(b'+') => State::Retry(StateName::ListBeforeUnordered),
- // Ordered.
- Some(b'0'..=b'9') if !tokenizer.interrupt => State::Retry(StateName::ListBeforeOrdered),
- Some(b'1') => State::Retry(StateName::ListBeforeOrdered),
- _ => State::Nok,
+ // Unordered.
+ if matches!(tokenizer.current, Some(b'*' | b'-')) {
+ tokenizer.check(State::Nok, State::Next(StateName::ListBeforeUnordered));
+ State::Retry(StateName::ThematicBreakStart)
+ } else if tokenizer.current == Some(b'+') {
+ State::Retry(StateName::ListBeforeUnordered)
+ }
+ // Ordered.
+ else if tokenizer.current == Some(b'1')
+ || (matches!(tokenizer.current, Some(b'0'..=b'9')) && !tokenizer.interrupt)
+ {
+ State::Retry(StateName::ListBeforeOrdered)
+ } else {
+ State::Nok
}
}
@@ -132,20 +135,20 @@ pub fn before_ordered(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn value(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'.' | b')') if !tokenizer.interrupt || tokenizer.tokenize_state.size < 2 => {
- tokenizer.exit(Name::ListItemValue);
- State::Retry(StateName::ListMarker)
- }
- Some(b'0'..=b'9') if tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
- tokenizer.tokenize_state.size += 1;
- tokenizer.consume();
- State::Next(StateName::ListValue)
- }
- _ => {
- tokenizer.tokenize_state.size = 0;
- State::Nok
- }
+ if matches!(tokenizer.current, Some(b'.' | b')'))
+ && (!tokenizer.interrupt || tokenizer.tokenize_state.size < 2)
+ {
+ tokenizer.exit(Name::ListItemValue);
+ State::Retry(StateName::ListMarker)
+ } else if matches!(tokenizer.current, Some(b'0'..=b'9'))
+ && tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX
+ {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::ListValue)
+ } else {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
}
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index b6b0f59..8c8ecbb 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -18,15 +18,16 @@ use crate::tokenizer::Tokenizer;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- // Make sure to eat the first `markers`.
- Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => {
+ // Make sure to eat the first `markers`.
+ if let Some(byte) = tokenizer.current {
+ if tokenizer.tokenize_state.markers.contains(&byte) {
tokenizer.enter(Name::Data);
tokenizer.consume();
- State::Next(StateName::DataInside)
+ return State::Next(StateName::DataInside);
}
- _ => State::Retry(StateName::DataAtBreak),
}
+
+ State::Retry(StateName::DataAtBreak)
}
/// Before something.
@@ -36,23 +37,21 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn at_break(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None => State::Ok,
- Some(b'\n') => {
- tokenizer.enter(Name::LineEnding);
- tokenizer.consume();
- tokenizer.exit(Name::LineEnding);
- State::Next(StateName::DataAtBreak)
- }
- Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => {
- tokenizer.register_resolver_before(ResolveName::Data);
- State::Ok
- }
- _ => {
+ if let Some(byte) = tokenizer.current {
+ if !tokenizer.tokenize_state.markers.contains(&byte) {
+ if byte == b'\n' {
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ return State::Next(StateName::DataAtBreak);
+ }
tokenizer.enter(Name::Data);
- State::Retry(StateName::DataInside)
+ return State::Retry(StateName::DataInside);
}
}
+
+ tokenizer.register_resolver_before(ResolveName::Data);
+ State::Ok
}
/// In data.
@@ -62,19 +61,15 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
/// ^^^
/// ```
pub fn inside(tokenizer: &mut Tokenizer) -> State {
- let done = match tokenizer.current {
- None | Some(b'\n') => true,
- Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => true,
- _ => false,
- };
-
- if done {
- tokenizer.exit(Name::Data);
- State::Retry(StateName::DataAtBreak)
- } else {
- tokenizer.consume();
- State::Next(StateName::DataInside)
+ if let Some(byte) = tokenizer.current {
+ if byte != b'\n' && !tokenizer.tokenize_state.markers.contains(&byte) {
+ tokenizer.consume();
+ return State::Next(StateName::DataInside);
+ }
}
+
+ tokenizer.exit(Name::Data);
+ State::Retry(StateName::DataAtBreak)
}
/// Merge adjacent data events.
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index dc5c904..29cb5c4 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -174,38 +174,39 @@ pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn raw(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\t' | b'\n' | b' ' | b')') if tokenizer.tokenize_state.size == 0 => {
- tokenizer.exit(Name::Data);
- tokenizer.exit(tokenizer.tokenize_state.token_5.clone());
- tokenizer.exit(tokenizer.tokenize_state.token_4.clone());
- tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
- tokenizer.tokenize_state.size = 0;
- State::Ok
- }
- Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_b => {
- tokenizer.consume();
- tokenizer.tokenize_state.size += 1;
- State::Next(StateName::DestinationRaw)
- }
- // ASCII control (but *not* `\0`) and space and `(`.
- None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) => {
- tokenizer.tokenize_state.size = 0;
- State::Nok
- }
- Some(b')') => {
- tokenizer.consume();
- tokenizer.tokenize_state.size -= 1;
- State::Next(StateName::DestinationRaw)
- }
- Some(b'\\') => {
- tokenizer.consume();
- State::Next(StateName::DestinationRawEscape)
- }
- Some(_) => {
- tokenizer.consume();
- State::Next(StateName::DestinationRaw)
- }
+ if tokenizer.tokenize_state.size == 0
+ && matches!(tokenizer.current, None | Some(b'\t' | b'\n' | b' ' | b')'))
+ {
+ tokenizer.exit(Name::Data);
+ tokenizer.exit(tokenizer.tokenize_state.token_5.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_4.clone());
+ tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
+ tokenizer.tokenize_state.size = 0;
+ State::Ok
+ } else if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_b
+ && tokenizer.current == Some(b'(')
+ {
+ tokenizer.consume();
+ tokenizer.tokenize_state.size += 1;
+ State::Next(StateName::DestinationRaw)
+ } else if tokenizer.current == Some(b')') {
+ tokenizer.consume();
+ tokenizer.tokenize_state.size -= 1;
+ State::Next(StateName::DestinationRaw)
+ }
+ // ASCII control (but *not* `\0`) and space and `(`.
+ else if matches!(
+ tokenizer.current,
+ None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F)
+ ) {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ } else if tokenizer.current == Some(b'\\') {
+ tokenizer.consume();
+ State::Next(StateName::DestinationRawEscape)
+ } else {
+ tokenizer.consume();
+ State::Next(StateName::DestinationRaw)
}
}
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 5f1b4cf..9637373 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -68,23 +68,24 @@ pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) ->
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Some(b'\t' | b' ') if tokenizer.tokenize_state.space_or_tab_max > 0 => {
- tokenizer.enter_with_content(
- tokenizer.tokenize_state.space_or_tab_token.clone(),
- tokenizer.tokenize_state.space_or_tab_content_type.clone(),
- );
-
- if tokenizer.tokenize_state.space_or_tab_connect {
- let index = tokenizer.events.len() - 1;
- link(&mut tokenizer.events, index);
- } else if tokenizer.tokenize_state.space_or_tab_content_type.is_some() {
- tokenizer.tokenize_state.space_or_tab_connect = true;
- }
+ if tokenizer.tokenize_state.space_or_tab_max > 0
+ && matches!(tokenizer.current, Some(b'\t' | b' '))
+ {
+ tokenizer.enter_with_content(
+ tokenizer.tokenize_state.space_or_tab_token.clone(),
+ tokenizer.tokenize_state.space_or_tab_content_type.clone(),
+ );
- State::Retry(StateName::SpaceOrTabInside)
+ if tokenizer.tokenize_state.space_or_tab_connect {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ } else if tokenizer.tokenize_state.space_or_tab_content_type.is_some() {
+ tokenizer.tokenize_state.space_or_tab_connect = true;
}
- _ => State::Retry(StateName::SpaceOrTabAfter),
+
+ State::Retry(StateName::SpaceOrTabInside)
+ } else {
+ State::Retry(StateName::SpaceOrTabAfter)
}
}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index b2989cb..af8206e 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -101,26 +101,22 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn at_break(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.size = 0;
- tokenizer.exit(Name::ThematicBreak);
- // Feel free to interrupt.
- tokenizer.interrupt = false;
- State::Ok
- }
- Some(b'*' | b'-' | b'_')
- if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker =>
- {
- tokenizer.enter(Name::ThematicBreakSequence);
- State::Retry(StateName::ThematicBreakSequence)
- }
- _ => {
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.size = 0;
- State::Nok
- }
+ if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.enter(Name::ThematicBreakSequence);
+ State::Retry(StateName::ThematicBreakSequence)
+ } else if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN
+ && matches!(tokenizer.current, None | Some(b'\n'))
+ {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.exit(Name::ThematicBreak);
+ // Feel free to interrupt.
+ tokenizer.interrupt = false;
+ State::Ok
+ } else {
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
}
}
diff --git a/src/state.rs b/src/state.rs
index 190683c..01530c7 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -75,7 +75,6 @@ pub enum Name {
CodeIndentedAfter,
CodeIndentedFurtherStart,
CodeIndentedInside,
- CodeIndentedFurtherEnd,
CodeIndentedFurtherBegin,
CodeIndentedFurtherAfter,
@@ -358,7 +357,6 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::CodeIndentedAfter => construct::code_indented::after,
Name::CodeIndentedFurtherStart => construct::code_indented::further_start,
Name::CodeIndentedInside => construct::code_indented::inside,
- Name::CodeIndentedFurtherEnd => construct::code_indented::further_end,
Name::CodeIndentedFurtherBegin => construct::code_indented::further_begin,
Name::CodeIndentedFurtherAfter => construct::code_indented::further_after,