diff options
Diffstat (limited to '')
-rw-r--r-- | src/construct/attention.rs | 53 | ||||
-rw-r--r-- | src/construct/autolink.rs | 19 | ||||
-rw-r--r-- | src/construct/character_escape.rs | 17 | ||||
-rw-r--r-- | src/construct/character_reference.rs | 17 | ||||
-rw-r--r-- | src/construct/code_fenced.rs | 127 | ||||
-rw-r--r-- | src/construct/code_indented.rs | 36 | ||||
-rw-r--r-- | src/construct/hard_break_escape.rs | 13 | ||||
-rw-r--r-- | src/construct/heading_atx.rs | 24 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 25 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 116 | ||||
-rw-r--r-- | src/construct/html_text.rs | 41 | ||||
-rw-r--r-- | src/construct/label_start_image.rs | 17 | ||||
-rw-r--r-- | src/construct/label_start_link.rs | 31 | ||||
-rw-r--r-- | src/construct/list.rs | 53 | ||||
-rw-r--r-- | src/construct/partial_data.rs | 57 | ||||
-rw-r--r-- | src/construct/partial_destination.rs | 65 | ||||
-rw-r--r-- | src/construct/partial_space_or_tab.rs | 31 | ||||
-rw-r--r-- | src/construct/thematic_break.rs | 36 |
18 files changed, 368 insertions, 410 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index 3a29d06..ae8da81 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -118,13 +118,13 @@ struct Sequence { /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'*' | b'_') if tokenizer.parse_state.constructs.attention => { - tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); - tokenizer.enter(Name::AttentionSequence); - State::Retry(StateName::AttentionInside) - } - _ => State::Nok, + if tokenizer.parse_state.constructs.attention && matches!(tokenizer.current, Some(b'*' | b'_')) + { + tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); + tokenizer.enter(Name::AttentionSequence); + State::Retry(StateName::AttentionInside) + } else { + State::Nok } } @@ -135,17 +135,14 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ^^ /// ``` pub fn inside(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'*' | b'_') if tokenizer.current == Some(tokenizer.tokenize_state.marker) => { - tokenizer.consume(); - State::Next(StateName::AttentionInside) - } - _ => { - tokenizer.exit(Name::AttentionSequence); - tokenizer.register_resolver(ResolveName::Attention); - tokenizer.tokenize_state.marker = b'\0'; - State::Ok - } + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.consume(); + State::Next(StateName::AttentionInside) + } else { + tokenizer.exit(Name::AttentionSequence); + tokenizer.register_resolver(ResolveName::Attention); + tokenizer.tokenize_state.marker = b'\0'; + State::Ok } } @@ -437,14 +434,22 @@ fn match_sequences( /// /// * [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js) fn classify_character(char: Option<char>) -> CharacterKind { - match char { - // EOF. - None => CharacterKind::Whitespace, + if let Some(char) = char { // Unicode whitespace. - Some(char) if char.is_whitespace() => CharacterKind::Whitespace, + if char.is_whitespace() { + CharacterKind::Whitespace + } // Unicode punctuation. - Some(char) if PUNCTUATION.contains(&char) => CharacterKind::Punctuation, + else if PUNCTUATION.contains(&char) { + CharacterKind::Punctuation + } // Everything else. - Some(_) => CharacterKind::Other, + else { + CharacterKind::Other + } + } + // EOF. + else { + CharacterKind::Whitespace } } diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 1bb8004..5c826a3 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -115,16 +115,15 @@ use crate::tokenizer::Tokenizer; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'<') if tokenizer.parse_state.constructs.autolink => { - tokenizer.enter(Name::Autolink); - tokenizer.enter(Name::AutolinkMarker); - tokenizer.consume(); - tokenizer.exit(Name::AutolinkMarker); - tokenizer.enter(Name::AutolinkProtocol); - State::Next(StateName::AutolinkOpen) - } - _ => State::Nok, + if tokenizer.parse_state.constructs.autolink && tokenizer.current == Some(b'<') { + tokenizer.enter(Name::Autolink); + tokenizer.enter(Name::AutolinkMarker); + tokenizer.consume(); + tokenizer.exit(Name::AutolinkMarker); + tokenizer.enter(Name::AutolinkProtocol); + State::Next(StateName::AutolinkOpen) + } else { + State::Nok } } diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 494f1d2..ac91c29 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -44,15 +44,14 @@ use crate::tokenizer::Tokenizer; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'\\') if tokenizer.parse_state.constructs.character_escape => { - tokenizer.enter(Name::CharacterEscape); - tokenizer.enter(Name::CharacterEscapeMarker); - tokenizer.consume(); - tokenizer.exit(Name::CharacterEscapeMarker); - State::Next(StateName::CharacterEscapeInside) - } - _ => State::Nok, + if tokenizer.parse_state.constructs.character_escape && tokenizer.current == Some(b'\\') { + tokenizer.enter(Name::CharacterEscape); + tokenizer.enter(Name::CharacterEscapeMarker); + tokenizer.consume(); + tokenizer.exit(Name::CharacterEscapeMarker); + State::Next(StateName::CharacterEscapeInside) + } else { + State::Nok } } diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 6171927..7d7b6f9 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -81,15 +81,14 @@ use crate::util::slice::Slice; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'&') if tokenizer.parse_state.constructs.character_reference => { - tokenizer.enter(Name::CharacterReference); - tokenizer.enter(Name::CharacterReferenceMarker); - tokenizer.consume(); - tokenizer.exit(Name::CharacterReferenceMarker); - State::Next(StateName::CharacterReferenceOpen) - } - _ => State::Nok, + if tokenizer.parse_state.constructs.character_reference && tokenizer.current == Some(b'&') { + tokenizer.enter(Name::CharacterReference); + tokenizer.enter(Name::CharacterReferenceMarker); + tokenizer.consume(); + tokenizer.exit(Name::CharacterReferenceMarker); + State::Next(StateName::CharacterReferenceOpen) + } else { + State::Nok } } diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index ac9a63f..e69f33c 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -180,28 +180,24 @@ pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State { /// | ~~~ /// ``` pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { - tokenizer.tokenize_state.size += 1; - tokenizer.consume(); - State::Next(StateName::CodeFencedSequenceOpen) - } - _ if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => { - tokenizer.exit(Name::CodeFencedFenceSequence); + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.tokenize_state.size += 1; + tokenizer.consume(); + State::Next(StateName::CodeFencedSequenceOpen) + } else if tokenizer.tokenize_state.size >= CODE_FENCED_SEQUENCE_SIZE_MIN { + tokenizer.exit(Name::CodeFencedFenceSequence); - tokenizer.attempt( - State::Next(StateName::CodeFencedInfoBefore), - State::Next(StateName::CodeFencedInfoBefore), - ); + tokenizer.attempt( + State::Next(StateName::CodeFencedInfoBefore), + State::Next(StateName::CodeFencedInfoBefore), + ); - State::Retry(space_or_tab(tokenizer)) - } - _ => { - tokenizer.tokenize_state.marker = 0; - tokenizer.tokenize_state.size_c = 0; - tokenizer.tokenize_state.size = 0; - State::Nok - } + State::Retry(space_or_tab(tokenizer)) + } else { + tokenizer.tokenize_state.marker = 0; + tokenizer.tokenize_state.size_c = 0; + tokenizer.tokenize_state.size = 0; + State::Nok } } @@ -257,16 +253,17 @@ pub fn info(tokenizer: &mut Tokenizer) -> State { ); State::Retry(space_or_tab(tokenizer)) } - Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { - tokenizer.concrete = false; - tokenizer.tokenize_state.marker = 0; - tokenizer.tokenize_state.size_c = 0; - tokenizer.tokenize_state.size = 0; - State::Nok - } - Some(_) => { - tokenizer.consume(); - State::Next(StateName::CodeFencedInfo) + Some(byte) => { + if tokenizer.tokenize_state.marker == byte && byte == b'`' { + tokenizer.concrete = false; + tokenizer.tokenize_state.marker = 0; + tokenizer.tokenize_state.size_c = 0; + tokenizer.tokenize_state.size = 0; + State::Nok + } else { + tokenizer.consume(); + State::Next(StateName::CodeFencedInfo) + } } } } @@ -305,16 +302,17 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Name::CodeFencedFenceMeta); State::Retry(StateName::CodeFencedInfoBefore) } - Some(b'`') if tokenizer.tokenize_state.marker == b'`' => { - tokenizer.concrete = false; - tokenizer.tokenize_state.marker = 0; - tokenizer.tokenize_state.size_c = 0; - tokenizer.tokenize_state.size = 0; - State::Nok - } - _ => { - tokenizer.consume(); - State::Next(StateName::CodeFencedMeta) + Some(byte) => { + if tokenizer.tokenize_state.marker == byte && byte == b'`' { + tokenizer.concrete = false; + tokenizer.tokenize_state.marker = 0; + tokenizer.tokenize_state.size_c = 0; + tokenizer.tokenize_state.size = 0; + State::Nok + } else { + tokenizer.consume(); + State::Next(StateName::CodeFencedMeta) + } } } } @@ -392,12 +390,11 @@ pub fn close_start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { - tokenizer.enter(Name::CodeFencedFenceSequence); - State::Retry(StateName::CodeFencedSequenceClose) - } - _ => State::Nok, + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.enter(Name::CodeFencedFenceSequence); + State::Retry(StateName::CodeFencedSequenceClose) + } else { + State::Nok } } @@ -410,27 +407,23 @@ pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn sequence_close(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'`' | b'~') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { - tokenizer.tokenize_state.size_b += 1; - tokenizer.consume(); - State::Next(StateName::CodeFencedSequenceClose) - } - _ if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN - && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size => - { - tokenizer.tokenize_state.size_b = 0; - tokenizer.exit(Name::CodeFencedFenceSequence); - tokenizer.attempt( - State::Next(StateName::CodeFencedAfterSequenceClose), - State::Next(StateName::CodeFencedAfterSequenceClose), - ); - State::Retry(space_or_tab(tokenizer)) - } - _ => { - tokenizer.tokenize_state.size_b = 0; - State::Nok - } + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.tokenize_state.size_b += 1; + tokenizer.consume(); + State::Next(StateName::CodeFencedSequenceClose) + } else if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN + && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size + { + tokenizer.tokenize_state.size_b = 0; + tokenizer.exit(Name::CodeFencedFenceSequence); + tokenizer.attempt( + State::Next(StateName::CodeFencedAfterSequenceClose), + State::Next(StateName::CodeFencedAfterSequenceClose), + ); + State::Retry(space_or_tab(tokenizer)) + } else { + tokenizer.tokenize_state.size_b = 0; + State::Nok } } diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 3a82dc4..5805346 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -135,33 +135,19 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { /// | bbb /// ``` pub fn further_start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'\n') if !tokenizer.lazy => { - tokenizer.enter(Name::LineEnding); - tokenizer.consume(); - tokenizer.exit(Name::LineEnding); - State::Next(StateName::CodeIndentedFurtherStart) - } - _ if !tokenizer.lazy => { - tokenizer.attempt( - State::Next(StateName::CodeIndentedFurtherEnd), - State::Next(StateName::CodeIndentedFurtherBegin), - ); - State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE)) - } - _ => State::Nok, + if tokenizer.lazy { + return State::Nok; } -} -/// At eol, followed by an indented line. -/// -/// ```markdown -/// > | aaa -/// ^ -/// | bbb -/// ``` -pub fn further_end(_tokenizer: &mut Tokenizer) -> State { - State::Ok + if tokenizer.current == Some(b'\n') { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + State::Next(StateName::CodeIndentedFurtherStart) + } else { + tokenizer.attempt(State::Ok, State::Next(StateName::CodeIndentedFurtherBegin)); + State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE)) + } } /// At the beginning of a line that is not indented enough. diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index cec34d5..79d8150 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -51,13 +51,12 @@ use crate::tokenizer::Tokenizer; /// | b /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => { - tokenizer.enter(Name::HardBreakEscape); - tokenizer.consume(); - State::Next(StateName::HardBreakEscapeAfter) - } - _ => State::Nok, + if tokenizer.parse_state.constructs.hard_break_escape && tokenizer.current == Some(b'\\') { + tokenizer.enter(Name::HardBreakEscape); + tokenizer.consume(); + State::Next(StateName::HardBreakEscapeAfter) + } else { + State::Nok } } diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 974158f..f75805a 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -107,27 +107,29 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None | Some(b'\n') if tokenizer.tokenize_state.size > 0 => { + if tokenizer.current == Some(b'#') + && tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX + { + tokenizer.tokenize_state.size += 1; + tokenizer.consume(); + State::Next(StateName::HeadingAtxSequenceOpen) + } else if tokenizer.tokenize_state.size > 0 { + if matches!(tokenizer.current, None | Some(b'\n')) { tokenizer.tokenize_state.size = 0; tokenizer.exit(Name::HeadingAtxSequence); State::Retry(StateName::HeadingAtxAtBreak) - } - Some(b'#') if tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX => { - tokenizer.tokenize_state.size += 1; - tokenizer.consume(); - State::Next(StateName::HeadingAtxSequenceOpen) - } - _ if tokenizer.tokenize_state.size > 0 => { + } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.tokenize_state.size = 0; tokenizer.exit(Name::HeadingAtxSequence); tokenizer.attempt(State::Next(StateName::HeadingAtxAtBreak), State::Nok); State::Retry(space_or_tab(tokenizer)) - } - _ => { + } else { tokenizer.tokenize_state.size = 0; State::Nok } + } else { + tokenizer.tokenize_state.size = 0; + State::Nok } } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 8485f5a..4e6345a 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -126,20 +126,17 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn inside(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'-' | b'=') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { - tokenizer.consume(); - State::Next(StateName::HeadingSetextInside) - } - _ => { - tokenizer.tokenize_state.marker = 0; - tokenizer.exit(Name::HeadingSetextUnderline); - tokenizer.attempt( - State::Next(StateName::HeadingSetextAfter), - State::Next(StateName::HeadingSetextAfter), - ); - State::Retry(space_or_tab(tokenizer)) - } + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.consume(); + State::Next(StateName::HeadingSetextInside) + } else { + tokenizer.tokenize_state.marker = 0; + tokenizer.exit(Name::HeadingSetextUnderline); + tokenizer.attempt( + State::Next(StateName::HeadingSetextAfter), + State::Next(StateName::HeadingSetextAfter), + ); + State::Retry(space_or_tab(tokenizer)) } } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 2da4f47..2d685b6 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -525,21 +525,17 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None | Some(b'\n') => { - tokenizer.tokenize_state.marker = 0; - tokenizer.tokenize_state.marker_b = 0; - State::Nok - } - Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker_b => { - tokenizer.tokenize_state.marker_b = 0; - tokenizer.consume(); - State::Next(StateName::HtmlFlowCompleteAttributeValueQuotedAfter) - } - _ => { - tokenizer.consume(); - State::Next(StateName::HtmlFlowCompleteAttributeValueQuoted) - } + if tokenizer.current == Some(tokenizer.tokenize_state.marker_b) { + tokenizer.tokenize_state.marker_b = 0; + tokenizer.consume(); + State::Next(StateName::HtmlFlowCompleteAttributeValueQuotedAfter) + } else if matches!(tokenizer.current, None | Some(b'\n')) { + tokenizer.tokenize_state.marker = 0; + tokenizer.tokenize_state.marker_b = 0; + State::Nok + } else { + tokenizer.consume(); + State::Next(StateName::HtmlFlowCompleteAttributeValueQuoted) } } @@ -624,47 +620,37 @@ pub fn complete_after(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn continuation(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'\n') - if tokenizer.tokenize_state.marker == BASIC - || tokenizer.tokenize_state.marker == COMPLETE => - { - tokenizer.exit(Name::HtmlFlowData); - tokenizer.check( - State::Next(StateName::HtmlFlowContinuationAfter), - State::Next(StateName::HtmlFlowContinuationStart), - ); - State::Retry(StateName::HtmlFlowBlankLineBefore) - } - // Note: important that this is after the basic/complete case. - None | Some(b'\n') => { - tokenizer.exit(Name::HtmlFlowData); - State::Retry(StateName::HtmlFlowContinuationStart) - } - Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => { - tokenizer.consume(); - State::Next(StateName::HtmlFlowContinuationCommentInside) - } - Some(b'<') if tokenizer.tokenize_state.marker == RAW => { - tokenizer.consume(); - State::Next(StateName::HtmlFlowContinuationRawTagOpen) - } - Some(b'>') if tokenizer.tokenize_state.marker == DECLARATION => { - tokenizer.consume(); - State::Next(StateName::HtmlFlowContinuationClose) - } - Some(b'?') if tokenizer.tokenize_state.marker == INSTRUCTION => { - tokenizer.consume(); - State::Next(StateName::HtmlFlowContinuationDeclarationInside) - } - Some(b']') if tokenizer.tokenize_state.marker == CDATA => { - tokenizer.consume(); - State::Next(StateName::HtmlFlowContinuationCdataInside) - } - _ => { - tokenizer.consume(); - State::Next(StateName::HtmlFlowContinuation) - } + if tokenizer.tokenize_state.marker == COMMENT && tokenizer.current == Some(b'-') { + tokenizer.consume(); + State::Next(StateName::HtmlFlowContinuationCommentInside) + } else if tokenizer.tokenize_state.marker == RAW && tokenizer.current == Some(b'<') { + tokenizer.consume(); + State::Next(StateName::HtmlFlowContinuationRawTagOpen) + } else if tokenizer.tokenize_state.marker == DECLARATION && tokenizer.current == Some(b'>') { + tokenizer.consume(); + State::Next(StateName::HtmlFlowContinuationClose) + } else if tokenizer.tokenize_state.marker == INSTRUCTION && tokenizer.current == Some(b'?') { + tokenizer.consume(); + State::Next(StateName::HtmlFlowContinuationDeclarationInside) + } else if tokenizer.tokenize_state.marker == CDATA && tokenizer.current == Some(b']') { + tokenizer.consume(); + State::Next(StateName::HtmlFlowContinuationCdataInside) + } else if (tokenizer.tokenize_state.marker == BASIC + || tokenizer.tokenize_state.marker == COMPLETE) + && tokenizer.current == Some(b'\n') + { + tokenizer.exit(Name::HtmlFlowData); + tokenizer.check( + State::Next(StateName::HtmlFlowContinuationAfter), + State::Next(StateName::HtmlFlowContinuationStart), + ); + State::Retry(StateName::HtmlFlowBlankLineBefore) + } else if matches!(tokenizer.current, None | Some(b'\n')) { + tokenizer.exit(Name::HtmlFlowData); + State::Retry(StateName::HtmlFlowContinuationStart) + } else { + tokenizer.consume(); + State::Next(StateName::HtmlFlowContinuation) } } @@ -822,16 +808,14 @@ pub fn continuation_cdata_inside(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn continuation_declaration_inside(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'>') => { - tokenizer.consume(); - State::Next(StateName::HtmlFlowContinuationClose) - } - Some(b'-') if tokenizer.tokenize_state.marker == COMMENT => { - tokenizer.consume(); - State::Next(StateName::HtmlFlowContinuationDeclarationInside) - } - _ => State::Retry(StateName::HtmlFlowContinuation), + if tokenizer.tokenize_state.marker == COMMENT && tokenizer.current == Some(b'-') { + tokenizer.consume(); + State::Next(StateName::HtmlFlowContinuationDeclarationInside) + } else if tokenizer.current == Some(b'>') { + tokenizer.consume(); + State::Next(StateName::HtmlFlowContinuationClose) + } else { + State::Retry(StateName::HtmlFlowContinuation) } } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index fde78de..ffbc768 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -558,26 +558,27 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None => { - tokenizer.tokenize_state.marker = 0; - State::Nok - } - Some(b'\n') => { - tokenizer.attempt( - State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted), - State::Nok, - ); - State::Retry(StateName::HtmlTextLineEndingBefore) - } - Some(b'"' | b'\'') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => { - tokenizer.tokenize_state.marker = 0; - tokenizer.consume(); - State::Next(StateName::HtmlTextTagOpenAttributeValueQuotedAfter) - } - _ => { - tokenizer.consume(); - State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted) + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.tokenize_state.marker = 0; + tokenizer.consume(); + State::Next(StateName::HtmlTextTagOpenAttributeValueQuotedAfter) + } else { + match tokenizer.current { + None => { + tokenizer.tokenize_state.marker = 0; + State::Nok + } + Some(b'\n') => { + tokenizer.attempt( + State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted), + State::Nok, + ); + State::Retry(StateName::HtmlTextLineEndingBefore) + } + _ => { + tokenizer.consume(); + State::Next(StateName::HtmlTextTagOpenAttributeValueQuoted) + } } } } diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index ce09f5b..b2890e6 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -40,15 +40,14 @@ use crate::tokenizer::{LabelStart, Tokenizer}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'!') if tokenizer.parse_state.constructs.label_start_image => { - tokenizer.enter(Name::LabelImage); - tokenizer.enter(Name::LabelImageMarker); - tokenizer.consume(); - tokenizer.exit(Name::LabelImageMarker); - State::Next(StateName::LabelStartImageOpen) - } - _ => State::Nok, + if tokenizer.parse_state.constructs.label_start_image && tokenizer.current == Some(b'!') { + tokenizer.enter(Name::LabelImage); + tokenizer.enter(Name::LabelImageMarker); + tokenizer.consume(); + tokenizer.exit(Name::LabelImageMarker); + State::Next(StateName::LabelStartImageOpen) + } else { + State::Nok } } diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index 7288d5d..8089e80 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -39,21 +39,20 @@ use crate::tokenizer::{LabelStart, Tokenizer}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'[') if tokenizer.parse_state.constructs.label_start_link => { - let start = tokenizer.events.len(); - tokenizer.enter(Name::LabelLink); - tokenizer.enter(Name::LabelMarker); - tokenizer.consume(); - tokenizer.exit(Name::LabelMarker); - tokenizer.exit(Name::LabelLink); - tokenizer.tokenize_state.label_start_stack.push(LabelStart { - start: (start, tokenizer.events.len() - 1), - inactive: false, - }); - tokenizer.register_resolver_before(ResolveName::Label); - State::Ok - } - _ => State::Nok, + if tokenizer.parse_state.constructs.label_start_link && tokenizer.current == Some(b'[') { + let start = tokenizer.events.len(); + tokenizer.enter(Name::LabelLink); + tokenizer.enter(Name::LabelMarker); + tokenizer.consume(); + tokenizer.exit(Name::LabelMarker); + tokenizer.exit(Name::LabelLink); + tokenizer.tokenize_state.label_start_stack.push(LabelStart { + start: (start, tokenizer.events.len() - 1), + inactive: false, + }); + tokenizer.register_resolver_before(ResolveName::Label); + State::Ok + } else { + State::Nok } } diff --git a/src/construct/list.rs b/src/construct/list.rs index 206f823..076ff58 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -86,17 +86,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn before(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - // Unordered. - Some(b'*' | b'-') => { - tokenizer.check(State::Nok, State::Next(StateName::ListBeforeUnordered)); - State::Retry(StateName::ThematicBreakStart) - } - Some(b'+') => State::Retry(StateName::ListBeforeUnordered), - // Ordered. - Some(b'0'..=b'9') if !tokenizer.interrupt => State::Retry(StateName::ListBeforeOrdered), - Some(b'1') => State::Retry(StateName::ListBeforeOrdered), - _ => State::Nok, + // Unordered. + if matches!(tokenizer.current, Some(b'*' | b'-')) { + tokenizer.check(State::Nok, State::Next(StateName::ListBeforeUnordered)); + State::Retry(StateName::ThematicBreakStart) + } else if tokenizer.current == Some(b'+') { + State::Retry(StateName::ListBeforeUnordered) + } + // Ordered. + else if tokenizer.current == Some(b'1') + || (matches!(tokenizer.current, Some(b'0'..=b'9')) && !tokenizer.interrupt) + { + State::Retry(StateName::ListBeforeOrdered) + } else { + State::Nok } } @@ -132,20 +135,20 @@ pub fn before_ordered(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn value(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'.' | b')') if !tokenizer.interrupt || tokenizer.tokenize_state.size < 2 => { - tokenizer.exit(Name::ListItemValue); - State::Retry(StateName::ListMarker) - } - Some(b'0'..=b'9') if tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX => { - tokenizer.tokenize_state.size += 1; - tokenizer.consume(); - State::Next(StateName::ListValue) - } - _ => { - tokenizer.tokenize_state.size = 0; - State::Nok - } + if matches!(tokenizer.current, Some(b'.' | b')')) + && (!tokenizer.interrupt || tokenizer.tokenize_state.size < 2) + { + tokenizer.exit(Name::ListItemValue); + State::Retry(StateName::ListMarker) + } else if matches!(tokenizer.current, Some(b'0'..=b'9')) + && tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX + { + tokenizer.tokenize_state.size += 1; + tokenizer.consume(); + State::Next(StateName::ListValue) + } else { + tokenizer.tokenize_state.size = 0; + State::Nok } } diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index b6b0f59..8c8ecbb 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -18,15 +18,16 @@ use crate::tokenizer::Tokenizer; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - // Make sure to eat the first `markers`. - Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => { + // Make sure to eat the first `markers`. + if let Some(byte) = tokenizer.current { + if tokenizer.tokenize_state.markers.contains(&byte) { tokenizer.enter(Name::Data); tokenizer.consume(); - State::Next(StateName::DataInside) + return State::Next(StateName::DataInside); } - _ => State::Retry(StateName::DataAtBreak), } + + State::Retry(StateName::DataAtBreak) } /// Before something. @@ -36,23 +37,21 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn at_break(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None => State::Ok, - Some(b'\n') => { - tokenizer.enter(Name::LineEnding); - tokenizer.consume(); - tokenizer.exit(Name::LineEnding); - State::Next(StateName::DataAtBreak) - } - Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => { - tokenizer.register_resolver_before(ResolveName::Data); - State::Ok - } - _ => { + if let Some(byte) = tokenizer.current { + if !tokenizer.tokenize_state.markers.contains(&byte) { + if byte == b'\n' { + tokenizer.enter(Name::LineEnding); + tokenizer.consume(); + tokenizer.exit(Name::LineEnding); + return State::Next(StateName::DataAtBreak); + } tokenizer.enter(Name::Data); - State::Retry(StateName::DataInside) + return State::Retry(StateName::DataInside); } } + + tokenizer.register_resolver_before(ResolveName::Data); + State::Ok } /// In data. @@ -62,19 +61,15 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State { /// ^^^ /// ``` pub fn inside(tokenizer: &mut Tokenizer) -> State { - let done = match tokenizer.current { - None | Some(b'\n') => true, - Some(byte) if tokenizer.tokenize_state.markers.contains(&byte) => true, - _ => false, - }; - - if done { - tokenizer.exit(Name::Data); - State::Retry(StateName::DataAtBreak) - } else { - tokenizer.consume(); - State::Next(StateName::DataInside) + if let Some(byte) = tokenizer.current { + if byte != b'\n' && !tokenizer.tokenize_state.markers.contains(&byte) { + tokenizer.consume(); + return State::Next(StateName::DataInside); + } } + + tokenizer.exit(Name::Data); + State::Retry(StateName::DataAtBreak) } /// Merge adjacent data events. diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index dc5c904..29cb5c4 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -174,38 +174,39 @@ pub fn enclosed_escape(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn raw(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None | Some(b'\t' | b'\n' | b' ' | b')') if tokenizer.tokenize_state.size == 0 => { - tokenizer.exit(Name::Data); - tokenizer.exit(tokenizer.tokenize_state.token_5.clone()); - tokenizer.exit(tokenizer.tokenize_state.token_4.clone()); - tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); - tokenizer.tokenize_state.size = 0; - State::Ok - } - Some(b'(') if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_b => { - tokenizer.consume(); - tokenizer.tokenize_state.size += 1; - State::Next(StateName::DestinationRaw) - } - // ASCII control (but *not* `\0`) and space and `(`. - None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) => { - tokenizer.tokenize_state.size = 0; - State::Nok - } - Some(b')') => { - tokenizer.consume(); - tokenizer.tokenize_state.size -= 1; - State::Next(StateName::DestinationRaw) - } - Some(b'\\') => { - tokenizer.consume(); - State::Next(StateName::DestinationRawEscape) - } - Some(_) => { - tokenizer.consume(); - State::Next(StateName::DestinationRaw) - } + if tokenizer.tokenize_state.size == 0 + && matches!(tokenizer.current, None | Some(b'\t' | b'\n' | b' ' | b')')) + { + tokenizer.exit(Name::Data); + tokenizer.exit(tokenizer.tokenize_state.token_5.clone()); + tokenizer.exit(tokenizer.tokenize_state.token_4.clone()); + tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); + tokenizer.tokenize_state.size = 0; + State::Ok + } else if tokenizer.tokenize_state.size < tokenizer.tokenize_state.size_b + && tokenizer.current == Some(b'(') + { + tokenizer.consume(); + tokenizer.tokenize_state.size += 1; + State::Next(StateName::DestinationRaw) + } else if tokenizer.current == Some(b')') { + tokenizer.consume(); + tokenizer.tokenize_state.size -= 1; + State::Next(StateName::DestinationRaw) + } + // ASCII control (but *not* `\0`) and space and `(`. + else if matches!( + tokenizer.current, + None | Some(0x01..=0x1F | b' ' | b'(' | 0x7F) + ) { + tokenizer.tokenize_state.size = 0; + State::Nok + } else if tokenizer.current == Some(b'\\') { + tokenizer.consume(); + State::Next(StateName::DestinationRawEscape) + } else { + tokenizer.consume(); + State::Next(StateName::DestinationRaw) } } diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 5f1b4cf..9637373 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -68,23 +68,24 @@ pub fn space_or_tab_with_options(tokenizer: &mut Tokenizer, options: Options) -> /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - Some(b'\t' | b' ') if tokenizer.tokenize_state.space_or_tab_max > 0 => { - tokenizer.enter_with_content( - tokenizer.tokenize_state.space_or_tab_token.clone(), - tokenizer.tokenize_state.space_or_tab_content_type.clone(), - ); - - if tokenizer.tokenize_state.space_or_tab_connect { - let index = tokenizer.events.len() - 1; - link(&mut tokenizer.events, index); - } else if tokenizer.tokenize_state.space_or_tab_content_type.is_some() { - tokenizer.tokenize_state.space_or_tab_connect = true; - } + if tokenizer.tokenize_state.space_or_tab_max > 0 + && matches!(tokenizer.current, Some(b'\t' | b' ')) + { + tokenizer.enter_with_content( + tokenizer.tokenize_state.space_or_tab_token.clone(), + tokenizer.tokenize_state.space_or_tab_content_type.clone(), + ); - State::Retry(StateName::SpaceOrTabInside) + if tokenizer.tokenize_state.space_or_tab_connect { + let index = tokenizer.events.len() - 1; + link(&mut tokenizer.events, index); + } else if tokenizer.tokenize_state.space_or_tab_content_type.is_some() { + tokenizer.tokenize_state.space_or_tab_connect = true; } - _ => State::Retry(StateName::SpaceOrTabAfter), + + State::Retry(StateName::SpaceOrTabInside) + } else { + State::Retry(StateName::SpaceOrTabAfter) } } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index b2989cb..af8206e 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -101,26 +101,22 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn at_break(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None | Some(b'\n') if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => { - tokenizer.tokenize_state.marker = 0; - tokenizer.tokenize_state.size = 0; - tokenizer.exit(Name::ThematicBreak); - // Feel free to interrupt. - tokenizer.interrupt = false; - State::Ok - } - Some(b'*' | b'-' | b'_') - if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => - { - tokenizer.enter(Name::ThematicBreakSequence); - State::Retry(StateName::ThematicBreakSequence) - } - _ => { - tokenizer.tokenize_state.marker = 0; - tokenizer.tokenize_state.size = 0; - State::Nok - } + if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.enter(Name::ThematicBreakSequence); + State::Retry(StateName::ThematicBreakSequence) + } else if tokenizer.tokenize_state.size >= THEMATIC_BREAK_MARKER_COUNT_MIN + && matches!(tokenizer.current, None | Some(b'\n')) + { + tokenizer.tokenize_state.marker = 0; + tokenizer.tokenize_state.size = 0; + tokenizer.exit(Name::ThematicBreak); + // Feel free to interrupt. + tokenizer.interrupt = false; + State::Ok + } else { + tokenizer.tokenize_state.marker = 0; + tokenizer.tokenize_state.size = 0; + State::Nok } } |