diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-22 15:03:15 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-22 15:03:15 +0200 |
commit | 41fc406af206e21014eaaba94bcf6b1854f892b3 (patch) | |
tree | 510f6e1d763643da9072f9cf7e097e777fdbd5b8 /src | |
parent | 37fad739ba73d488d4c3652caee01f1ec5d0aaaa (diff) | |
download | markdown-rs-41fc406af206e21014eaaba94bcf6b1854f892b3.tar.gz markdown-rs-41fc406af206e21014eaaba94bcf6b1854f892b3.tar.bz2 markdown-rs-41fc406af206e21014eaaba94bcf6b1854f892b3.zip |
Refactor to pass ints instead of vecs around
Diffstat (limited to 'src')
35 files changed, 393 insertions, 464 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index 38164a8..42c0965 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -175,7 +175,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::AttentionSequence); inside(tokenizer, code, MarkerKind::from_code(code)) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -189,12 +189,12 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, marker: MarkerKind) -> StateFnR match code { Code::Char(char) if char == marker.as_char() => { tokenizer.consume(code); - (State::Fn(Box::new(move |t, c| inside(t, c, marker))), None) + (State::Fn(Box::new(move |t, c| inside(t, c, marker))), 0) } _ => { tokenizer.exit(Token::AttentionSequence); tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention)); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } } } diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 0ef4607..e8caf3b 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -121,9 +121,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.consume(code); tokenizer.exit(Token::AutolinkMarker); tokenizer.enter(Token::AutolinkProtocol); - (State::Fn(Box::new(open)), None) + (State::Fn(Box::new(open)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -139,10 +139,10 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) if char.is_ascii_alphabetic() => { tokenizer.consume(code); - (State::Fn(Box::new(scheme_or_email_atext)), None) + (State::Fn(Box::new(scheme_or_email_atext)), 0) } Code::Char(char) if is_ascii_atext(char) => email_atext(tokenizer, code), - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -179,7 +179,7 @@ fn scheme_inside_or_email_atext( match code { Code::Char(':') => { tokenizer.consume(code); - (State::Fn(Box::new(url_inside)), None) + (State::Fn(Box::new(url_inside)), 0) } Code::Char('+' | '-' | '.' | '0'..='9' | 'A'..='Z' | 'a'..='z') if size < AUTOLINK_SCHEME_SIZE_MAX => @@ -189,7 +189,7 @@ fn scheme_inside_or_email_atext( State::Fn(Box::new(move |t, c| { scheme_inside_or_email_atext(t, c, size + 1) })), - None, + 0, ) } _ => email_atext(tokenizer, code), @@ -208,13 +208,13 @@ fn url_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(Token::AutolinkProtocol); end(tokenizer, code) } - Code::Char(char) if char.is_ascii_control() => (State::Nok, None), + Code::Char(char) if char.is_ascii_control() => (State::Nok, 0), Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(' ') => { - (State::Nok, None) + (State::Nok, 0) } Code::Char(_) => { tokenizer.consume(code); - (State::Fn(Box::new(url_inside)), None) + (State::Fn(Box::new(url_inside)), 0) } } } @@ -229,16 +229,13 @@ fn email_atext(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('@') => { tokenizer.consume(code); - ( - State::Fn(Box::new(|t, c| email_at_sign_or_dot(t, c, 0))), - None, - ) + (State::Fn(Box::new(|t, c| email_at_sign_or_dot(t, c, 0))), 0) } Code::Char(char) if is_ascii_atext(char) => { tokenizer.consume(code); - (State::Fn(Box::new(email_atext)), None) + (State::Fn(Box::new(email_atext)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -251,7 +248,7 @@ fn email_atext(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { match code { Code::Char(char) if char.is_ascii_alphanumeric() => email_value(tokenizer, code, size), - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -265,10 +262,7 @@ fn email_label(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnRes match code { Code::Char('.') => { tokenizer.consume(code); - ( - State::Fn(Box::new(|t, c| email_at_sign_or_dot(t, c, 0))), - None, - ) + (State::Fn(Box::new(|t, c| email_at_sign_or_dot(t, c, 0))), 0) } Code::Char('>') => { let index = tokenizer.events.len(); @@ -296,17 +290,17 @@ fn email_value(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnRes tokenizer.consume(code); ( State::Fn(Box::new(move |t, c| email_value(t, c, size + 1))), - None, + 0, ) } Code::Char(char) if char.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => { tokenizer.consume(code); ( State::Fn(Box::new(move |t, c| email_label(t, c, size + 1))), - None, + 0, ) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -325,7 +319,7 @@ fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.consume(code); tokenizer.exit(Token::AutolinkMarker); tokenizer.exit(Token::Autolink); - (State::Ok, None) + (State::Ok, 0) } _ => unreachable!("expected `>`"), } diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index cf51aec..5b6513d 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -60,8 +60,8 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index a8a8fa8..d1b4005 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -53,7 +53,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if tokenizer.parse_state.constructs.block_quote { tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -103,9 +103,9 @@ fn cont_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::BlockQuoteMarker); tokenizer.consume(code); tokenizer.exit(Token::BlockQuoteMarker); - (State::Fn(Box::new(cont_after)), None) + (State::Fn(Box::new(cont_after)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -124,11 +124,11 @@ fn cont_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.consume(code); tokenizer.exit(Token::SpaceOrTab); tokenizer.exit(Token::BlockQuotePrefix); - (State::Ok, None) + (State::Ok, 0) } _ => { tokenizer.exit(Token::BlockQuotePrefix); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } } } diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index f171d38..eb79486 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -49,9 +49,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::CharacterEscapeMarker); tokenizer.consume(code); tokenizer.exit(Token::CharacterEscapeMarker); - (State::Fn(Box::new(inside)), None) + (State::Fn(Box::new(inside)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -68,8 +68,8 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.consume(code); tokenizer.exit(Token::CharacterEscapeValue); tokenizer.exit(Token::CharacterEscape); - (State::Ok, None) + (State::Ok, 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index a4cbec1..2d9d524 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -143,9 +143,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::CharacterReferenceMarker); tokenizer.consume(code); tokenizer.exit(Token::CharacterReferenceMarker); - (State::Fn(Box::new(open)), None) + (State::Fn(Box::new(open)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -169,7 +169,7 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::CharacterReferenceMarkerNumeric); tokenizer.consume(code); tokenizer.exit(Token::CharacterReferenceMarkerNumeric); - (State::Fn(Box::new(|t, c| numeric(t, c, info))), None) + (State::Fn(Box::new(|t, c| numeric(t, c, info))), 0) } else { tokenizer.enter(Token::CharacterReferenceValue); value(tokenizer, code, info) @@ -192,7 +192,7 @@ fn numeric(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResu tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal); tokenizer.enter(Token::CharacterReferenceValue); info.kind = Kind::Hexadecimal; - (State::Fn(Box::new(|t, c| value(t, c, info))), None) + (State::Fn(Box::new(|t, c| value(t, c, info))), 0) } else { tokenizer.enter(Token::CharacterReferenceValue); info.kind = Kind::Decimal; @@ -220,25 +220,25 @@ fn value(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult && !CHARACTER_REFERENCES.iter().any(|d| d.0 == info.buffer); if unknown_named { - (State::Nok, None) + (State::Nok, 0) } else { tokenizer.exit(Token::CharacterReferenceValue); tokenizer.enter(Token::CharacterReferenceMarkerSemi); tokenizer.consume(code); tokenizer.exit(Token::CharacterReferenceMarkerSemi); tokenizer.exit(Token::CharacterReference); - (State::Ok, None) + (State::Ok, 0) } } Code::Char(char) => { if info.buffer.len() < info.kind.max() && info.kind.allowed(char) { info.buffer.push(char); tokenizer.consume(code); - (State::Fn(Box::new(|t, c| value(t, c, info))), None) + (State::Fn(Box::new(|t, c| value(t, c, info))), 0) } else { - (State::Nok, None) + (State::Nok, 0) } } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 49bcae3..332d93c 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -199,7 +199,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::CodeFencedFence); tokenizer.go(space_or_tab_min_max(0, max), before_sequence_open)(tokenizer, code) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -235,7 +235,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult }, ) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -256,14 +256,14 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> State info.size += 1; sequence_open(t, c, info) })), - None, + 0, ) } _ if info.size >= CODE_FENCED_SEQUENCE_SIZE_MIN => { tokenizer.exit(Token::CodeFencedFenceSequence); tokenizer.attempt_opt(space_or_tab(), |t, c| info_before(t, c, info))(tokenizer, code) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -319,13 +319,13 @@ fn info_inside( tokenizer.exit(Token::CodeFencedFenceInfo); tokenizer.attempt_opt(space_or_tab(), |t, c| meta_before(t, c, info))(tokenizer, code) } - Code::Char('`') if info.kind == Kind::GraveAccent => (State::Nok, None), + Code::Char('`') if info.kind == Kind::GraveAccent => (State::Nok, 0), Code::Char(_) => { codes.push(code); tokenizer.consume(code); ( State::Fn(Box::new(|t, c| info_inside(t, c, info, codes))), - None, + 0, ) } } @@ -373,10 +373,10 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { tokenizer.concrete = true; at_break(tokenizer, code, info) } - Code::Char('`') if info.kind == Kind::GraveAccent => (State::Nok, None), + Code::Char('`') if info.kind == Kind::GraveAccent => (State::Nok, 0), _ => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| meta(t, c, info))), None) + (State::Fn(Box::new(|t, c| meta(t, c, info))), 0) } } } @@ -438,7 +438,7 @@ fn close_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(|t, c| close_start(t, c, info))), None) + (State::Fn(Box::new(|t, c| close_start(t, c, info))), 0) } _ => unreachable!("expected eol"), } @@ -459,14 +459,10 @@ fn close_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu usize::MAX }; - if tokenizer.parse_state.constructs.code_fenced { - tokenizer.enter(Token::CodeFencedFence); - tokenizer.go(space_or_tab_min_max(0, max), |t, c| { - close_before(t, c, info) - })(tokenizer, code) - } else { - (State::Nok, None) - } + tokenizer.enter(Token::CodeFencedFence); + tokenizer.go(space_or_tab_min_max(0, max), |t, c| { + close_before(t, c, info) + })(tokenizer, code) } /// In a closing fence, after optional whitespace, before sequence. @@ -483,7 +479,7 @@ fn close_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes tokenizer.enter(Token::CodeFencedFenceSequence); close_sequence(tokenizer, code, info, 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -501,14 +497,14 @@ fn close_sequence(tokenizer: &mut Tokenizer, code: Code, info: Info, size: usize tokenizer.consume(code); ( State::Fn(Box::new(move |t, c| close_sequence(t, c, info, size + 1))), - None, + 0, ) } _ if size >= CODE_FENCED_SEQUENCE_SIZE_MIN && size >= info.size => { tokenizer.exit(Token::CodeFencedFenceSequence); tokenizer.attempt_opt(space_or_tab(), close_sequence_after)(tokenizer, code) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -524,9 +520,9 @@ fn close_sequence_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::CodeFencedFence); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -542,7 +538,7 @@ fn content_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnR tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(|t, c| content_start(t, c, info))), None) + (State::Fn(Box::new(|t, c| content_start(t, c, info))), 0) } /// Before code content, definitely not before a closing fence. /// @@ -594,10 +590,7 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateF } _ => { tokenizer.consume(code); - ( - State::Fn(Box::new(|t, c| content_continue(t, c, info))), - None, - ) + (State::Fn(Box::new(|t, c| content_continue(t, c, info))), 0) } } } @@ -616,5 +609,5 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.interrupt = false; // No longer concrete. tokenizer.concrete = false; - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index eb0811b..2a8b92f 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -63,7 +63,7 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // Do not interrupt paragraphs. if tokenizer.interrupt || !tokenizer.parse_state.constructs.code_indented { - (State::Nok, None) + (State::Nok, 0) } else { tokenizer.enter(Token::CodeIndented); tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer, code) @@ -104,7 +104,7 @@ fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } _ => { tokenizer.consume(code); - (State::Fn(Box::new(content)), None) + (State::Fn(Box::new(content)), 0) } } } @@ -119,7 +119,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(Token::CodeIndented); // Feel free to interrupt. tokenizer.interrupt = false; - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } /// Right at a line ending, trying to parse another indent. @@ -131,14 +131,14 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if tokenizer.lazy { - (State::Nok, None) + (State::Nok, 0) } else { match code { Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(further_start)), None) + (State::Fn(Box::new(further_start)), 0) } _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { Box::new(if ok { further_end } else { further_begin }) @@ -155,7 +155,7 @@ fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ^ /// ``` fn further_end(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } /// At the beginning of a line that is not indented enough. @@ -179,6 +179,6 @@ fn further_begin(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn further_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => further_start(tokenizer, code), - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index eb143ba..03ff881 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -108,7 +108,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::CodeTextSequence); sequence_open(tokenizer, code, 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -123,7 +123,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnR tokenizer.consume(code); ( State::Fn(Box::new(move |t, c| sequence_open(t, c, size + 1))), - None, + 0, ) } else { tokenizer.exit(Token::CodeTextSequence); @@ -139,15 +139,12 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnR /// ``` fn between(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnResult { match code { - Code::None => (State::Nok, None), + Code::None => (State::Nok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); - ( - State::Fn(Box::new(move |t, c| between(t, c, size_open))), - None, - ) + (State::Fn(Box::new(move |t, c| between(t, c, size_open))), 0) } Code::Char('`') => { tokenizer.enter(Token::CodeTextSequence); @@ -174,7 +171,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnResul } _ => { tokenizer.consume(code); - (State::Fn(Box::new(move |t, c| data(t, c, size_open))), None) + (State::Fn(Box::new(move |t, c| data(t, c, size_open))), 0) } } } @@ -198,13 +195,13 @@ fn sequence_close( State::Fn(Box::new(move |t, c| { sequence_close(t, c, size_open, size + 1) })), - None, + 0, ) } _ if size_open == size => { tokenizer.exit(Token::CodeTextSequence); tokenizer.exit(Token::CodeText); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } _ => { let index = tokenizer.events.len(); diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 231011f..2016cc4 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -125,7 +125,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // Note: arbitrary whitespace allowed even if code (indented) is on. tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -151,7 +151,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { }, label_after, )(tokenizer, code), - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -171,10 +171,10 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { State::Fn(Box::new( tokenizer.attempt_opt(space_or_tab_eol(), destination_before), )), - None, + 0, ) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -240,9 +240,9 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(Token::Definition); // You’d be interrupting. tokenizer.interrupt = true; - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -301,8 +301,8 @@ fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn title_after_after_optional_whitespace(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index 191ef67..19d430d 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -57,9 +57,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::HardBreakEscapeMarker); tokenizer.consume(code); tokenizer.exit(Token::HardBreakEscapeMarker); - (State::Fn(Box::new(inside)), None) + (State::Fn(Box::new(inside)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -74,8 +74,8 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::HardBreakEscape); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/hard_break_trailing.rs b/src/construct/hard_break_trailing.rs index 88c668a..c5861f7 100644 --- a/src/construct/hard_break_trailing.rs +++ b/src/construct/hard_break_trailing.rs @@ -57,9 +57,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::HardBreakTrailing); tokenizer.enter(Token::HardBreakTrailingSpace); tokenizer.consume(code); - (State::Fn(Box::new(|t, c| inside(t, c, 1))), None) + (State::Fn(Box::new(|t, c| inside(t, c, 1))), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -74,18 +74,15 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { match code { Code::Char(' ') => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| inside(t, c, size + 1))), - None, - ) + (State::Fn(Box::new(move |t, c| inside(t, c, size + 1))), 0) } Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') if size >= HARD_BREAK_PREFIX_SIZE_MIN => { tokenizer.exit(Token::HardBreakTrailingSpace); tokenizer.exit(Token::HardBreakTrailing); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index e544064..ac36e29 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -77,7 +77,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::HeadingAtx); tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -92,7 +92,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::HeadingAtxSequence); sequence_open(tokenizer, code, 0) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -114,14 +114,14 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnR State::Fn(Box::new(move |tokenizer, code| { sequence_open(tokenizer, code, rank + 1) })), - None, + 0, ) } _ if rank > 0 => { tokenizer.exit(Token::HeadingAtxSequence); tokenizer.go(space_or_tab(), at_break)(tokenizer, code) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -138,7 +138,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve)); // Feel free to interrupt. tokenizer.interrupt = false; - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.go(space_or_tab(), at_break)(tokenizer, code) @@ -165,7 +165,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn further_sequence(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if let Code::Char('#') = code { tokenizer.consume(code); - (State::Fn(Box::new(further_sequence)), None) + (State::Fn(Box::new(further_sequence)), 0) } else { tokenizer.exit(Token::HeadingAtxSequence); at_break(tokenizer, code) @@ -187,7 +187,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } _ => { tokenizer.consume(code); - (State::Fn(Box::new(data)), None) + (State::Fn(Box::new(data)), 0) } } } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index ee6c23c..49d9cd2 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -135,7 +135,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if paragraph_before && !tokenizer.lazy && tokenizer.parse_state.constructs.heading_setext { tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -152,7 +152,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::HeadingSetextUnderline); inside(tokenizer, code, Kind::from_char(char)) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -167,7 +167,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { match code { Code::Char(char) if char == kind.as_char() => { tokenizer.consume(code); - (State::Fn(Box::new(move |t, c| inside(t, c, kind))), None) + (State::Fn(Box::new(move |t, c| inside(t, c, kind))), 0) } _ => { tokenizer.exit(Token::HeadingSetextUnderline); @@ -189,9 +189,9 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // Feel free to interrupt. tokenizer.interrupt = false; tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve)); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 1255081..c77a626 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -223,7 +223,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { before, )(tokenizer, code) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -237,9 +237,9 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if Code::Char('<') == code { tokenizer.enter(Token::HtmlFlowData); tokenizer.consume(code); - (State::Fn(Box::new(open)), None) + (State::Fn(Box::new(open)), 0) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -267,17 +267,11 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('!') => { tokenizer.consume(code); - ( - State::Fn(Box::new(|t, c| declaration_open(t, c, info))), - None, - ) + (State::Fn(Box::new(|t, c| declaration_open(t, c, info))), 0) } Code::Char('/') => { tokenizer.consume(code); - ( - State::Fn(Box::new(|t, c| tag_close_start(t, c, info))), - None, - ) + (State::Fn(Box::new(|t, c| tag_close_start(t, c, info))), 0) } Code::Char('?') => { info.kind = Kind::Instruction; @@ -288,14 +282,14 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // right now, so we do need to search for `>`, similar to declarations. ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), - None, + 0, ) } Code::Char('A'..='Z' | 'a'..='z') => { info.start_tag = true; tag_name(tokenizer, code, info) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -316,7 +310,7 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> St info.kind = Kind::Comment; ( State::Fn(Box::new(|t, c| comment_open_inside(t, c, info))), - None, + 0, ) } Code::Char('[') => { @@ -324,10 +318,7 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> St info.kind = Kind::Cdata; info.buffer = parse("CDATA["); info.index = 0; - ( - State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))), - None, - ) + (State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))), 0) } Code::Char('A'..='Z' | 'a'..='z') => { tokenizer.consume(code); @@ -336,10 +327,10 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> St tokenizer.concrete = true; ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), - None, + 0, ) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -357,10 +348,10 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta tokenizer.concrete = true; ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), - None, + 0, ) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -379,15 +370,12 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> S info.buffer.clear(); // Do not form containers. tokenizer.concrete = true; - (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) + (State::Fn(Box::new(|t, c| continuation(t, c, info))), 0) } else { - ( - State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))), - None, - ) + (State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))), 0) } } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -402,9 +390,9 @@ fn tag_close_start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> Sta Code::Char('A'..='Z' | 'a'..='z') => { tokenizer.consume(code); info.buffer.push(code); - (State::Fn(Box::new(|t, c| tag_name(t, c, info))), None) + (State::Fn(Box::new(|t, c| tag_name(t, c, info))), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -439,7 +427,7 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes tokenizer.consume(code); ( State::Fn(Box::new(|t, c| basic_self_closing(t, c, info))), - None, + 0, ) } else { // Do not form containers. @@ -451,7 +439,7 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes // Do not support complete HTML when interrupting. if tokenizer.interrupt && !tokenizer.lazy { - (State::Nok, None) + (State::Nok, 0) } else if info.start_tag { complete_attribute_name_before(tokenizer, code, info) } else { @@ -462,9 +450,9 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes Code::Char('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => { tokenizer.consume(code); info.buffer.push(code); - (State::Fn(Box::new(|t, c| tag_name(t, c, info))), None) + (State::Fn(Box::new(|t, c| tag_name(t, c, info))), 0) } - Code::Char(_) => (State::Nok, None), + Code::Char(_) => (State::Nok, 0), } } @@ -480,9 +468,9 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat tokenizer.consume(code); // Do not form containers. tokenizer.concrete = true; - (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) + (State::Fn(Box::new(|t, c| continuation(t, c, info))), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -498,7 +486,7 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer, code: Code, info: Info) tokenizer.consume(code); ( State::Fn(Box::new(|t, c| complete_closing_tag_after(t, c, info))), - None, + 0, ) } _ => complete_end(tokenizer, code, info), @@ -532,20 +520,20 @@ fn complete_attribute_name_before( match code { Code::Char('/') => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| complete_end(t, c, info))), None) + (State::Fn(Box::new(|t, c| complete_end(t, c, info))), 0) } Code::Char('0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))), - None, + 0, ) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| complete_attribute_name_before(t, c, info))), - None, + 0, ) } _ => complete_end(tokenizer, code, info), @@ -568,7 +556,7 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, code: Code, info: Info) -> tokenizer.consume(code); ( State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))), - None, + 0, ) } _ => complete_attribute_name_after(tokenizer, code, info), @@ -594,14 +582,14 @@ fn complete_attribute_name_after( tokenizer.consume(code); ( State::Fn(Box::new(|t, c| complete_attribute_value_before(t, c, info))), - None, + 0, ) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| complete_attribute_name_after(t, c, info))), - None, + 0, ) } _ => complete_attribute_name_before(tokenizer, code, info), @@ -623,20 +611,20 @@ fn complete_attribute_value_before( mut info: Info, ) -> StateFnResult { match code { - Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None), + Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, 0), Code::Char('"' | '\'') => { tokenizer.consume(code); info.quote = Some(QuoteKind::from_code(code)); ( State::Fn(Box::new(|t, c| complete_attribute_value_quoted(t, c, info))), - None, + 0, ) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| complete_attribute_value_before(t, c, info))), - None, + 0, ) } _ => complete_attribute_value_unquoted(tokenizer, code, info), @@ -657,21 +645,21 @@ fn complete_attribute_value_quoted( info: Info, ) -> StateFnResult { match code { - Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None), + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, 0), Code::Char(char) if char == info.quote.as_ref().unwrap().as_char() => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| { complete_attribute_value_quoted_after(t, c, info) })), - None, + 0, ) } _ => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| complete_attribute_value_quoted(t, c, info))), - None, + 0, ) } } @@ -701,7 +689,7 @@ fn complete_attribute_value_unquoted( State::Fn(Box::new(|t, c| { complete_attribute_value_unquoted(t, c, info) })), - None, + 0, ) } } @@ -723,7 +711,7 @@ fn complete_attribute_value_quoted_after( Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => { complete_attribute_name_before(tokenizer, code, info) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -737,9 +725,9 @@ fn complete_end(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes match code { Code::Char('>') => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| complete_after(t, c, info))), None) + (State::Fn(Box::new(|t, c| complete_after(t, c, info))), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -758,9 +746,9 @@ fn complete_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnR } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| complete_after(t, c, info))), None) + (State::Fn(Box::new(|t, c| complete_after(t, c, info))), 0) } - Code::Char(_) => (State::Nok, None), + Code::Char(_) => (State::Nok, 0), } } @@ -776,28 +764,28 @@ fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_comment_inside(t, c, info))), - None, + 0, ) } Code::Char('<') if info.kind == Kind::Raw => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_raw_tag_open(t, c, info))), - None, + 0, ) } Code::Char('>') if info.kind == Kind::Declaration => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_close(t, c, info))), - None, + 0, ) } Code::Char('?') if info.kind == Kind::Instruction => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), - None, + 0, ) } Code::Char(']') if info.kind == Kind::Cdata => { @@ -806,7 +794,7 @@ fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes State::Fn(Box::new(|t, c| { continuation_character_data_inside(t, c, info) })), - None, + 0, ) } Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') @@ -827,7 +815,7 @@ fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes } _ => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) + (State::Fn(Box::new(|t, c| continuation(t, c, info))), 0) } } } @@ -864,7 +852,7 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, code: Code, info: Info tokenizer.exit(Token::LineEnding); ( State::Fn(Box::new(|t, c| continuation_before(t, c, info))), - None, + 0, ) } _ => unreachable!("expected eol"), @@ -902,7 +890,7 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer, code: Code, info: Info tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), - None, + 0, ) } _ => continuation(tokenizer, code, info), @@ -921,7 +909,7 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, code: Code, info: Info) tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_raw_end_tag(t, c, info))), - None, + 0, ) } _ => continuation(tokenizer, code, info), @@ -948,7 +936,7 @@ fn continuation_raw_end_tag( tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_close(t, c, info))), - None, + 0, ) } else { continuation(tokenizer, code, info) @@ -959,7 +947,7 @@ fn continuation_raw_end_tag( info.buffer.push(code); ( State::Fn(Box::new(|t, c| continuation_raw_end_tag(t, c, info))), - None, + 0, ) } _ => { @@ -985,7 +973,7 @@ fn continuation_character_data_inside( tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), - None, + 0, ) } _ => continuation(tokenizer, code, info), @@ -1016,14 +1004,14 @@ fn continuation_declaration_inside( tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_close(t, c, info))), - None, + 0, ) } Code::Char('-') if info.kind == Kind::Comment => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), - None, + 0, ) } _ => continuation(tokenizer, code, info), @@ -1046,7 +1034,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat tokenizer.consume(code); ( State::Fn(Box::new(|t, c| continuation_close(t, c, info))), - None, + 0, ) } } @@ -1064,7 +1052,7 @@ fn continuation_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.interrupt = false; // No longer concrete. tokenizer.concrete = false; - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } /// Before a line ending, expecting a blank line. @@ -1078,5 +1066,5 @@ fn blank_line_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(blank_line)), None) + (State::Fn(Box::new(blank_line)), 0) } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index db00551..1696f68 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -70,9 +70,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::HtmlText); tokenizer.enter(Token::HtmlTextData); tokenizer.consume(code); - (State::Fn(Box::new(open)), None) + (State::Fn(Box::new(open)), 0) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -90,21 +90,21 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('!') => { tokenizer.consume(code); - (State::Fn(Box::new(declaration_open)), None) + (State::Fn(Box::new(declaration_open)), 0) } Code::Char('/') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_close_start)), None) + (State::Fn(Box::new(tag_close_start)), 0) } Code::Char('?') => { tokenizer.consume(code); - (State::Fn(Box::new(instruction)), None) + (State::Fn(Box::new(instruction)), 0) } Code::Char('A'..='Z' | 'a'..='z') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open)), None) + (State::Fn(Box::new(tag_open)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -122,21 +122,21 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); - (State::Fn(Box::new(comment_open_inside)), None) + (State::Fn(Box::new(comment_open_inside)), 0) } Code::Char('[') => { tokenizer.consume(code); let buffer = parse("CDATA["); ( State::Fn(Box::new(|t, c| cdata_open_inside(t, c, buffer, 0))), - None, + 0, ) } Code::Char('A'..='Z' | 'a'..='z') => { tokenizer.consume(code); - (State::Fn(Box::new(declaration)), None) + (State::Fn(Box::new(declaration)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -150,9 +150,9 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); - (State::Fn(Box::new(comment_start)), None) + (State::Fn(Box::new(comment_start)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -171,10 +171,10 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// [html_flow]: crate::construct::html_flow fn comment_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None | Code::Char('>') => (State::Nok, None), + Code::None | Code::Char('>') => (State::Nok, 0), Code::Char('-') => { tokenizer.consume(code); - (State::Fn(Box::new(comment_start_dash)), None) + (State::Fn(Box::new(comment_start_dash)), 0) } _ => comment(tokenizer, code), } @@ -195,7 +195,7 @@ fn comment_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// [html_flow]: crate::construct::html_flow fn comment_start_dash(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None | Code::Char('>') => (State::Nok, None), + Code::None | Code::Char('>') => (State::Nok, 0), _ => comment(tokenizer, code), } } @@ -208,17 +208,17 @@ fn comment_start_dash(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn comment(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => (State::Nok, None), + Code::None => (State::Nok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { at_line_ending(tokenizer, code, Box::new(comment)) } Code::Char('-') => { tokenizer.consume(code); - (State::Fn(Box::new(comment_close)), None) + (State::Fn(Box::new(comment_close)), 0) } _ => { tokenizer.consume(code); - (State::Fn(Box::new(comment)), None) + (State::Fn(Box::new(comment)), 0) } } } @@ -233,7 +233,7 @@ fn comment_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); - (State::Fn(Box::new(end)), None) + (State::Fn(Box::new(end)), 0) } _ => comment(tokenizer, code), } @@ -255,17 +255,17 @@ fn cdata_open_inside( tokenizer.consume(code); if index + 1 == buffer.len() { - (State::Fn(Box::new(cdata)), None) + (State::Fn(Box::new(cdata)), 0) } else { ( State::Fn(Box::new(move |t, c| { cdata_open_inside(t, c, buffer, index + 1) })), - None, + 0, ) } } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -277,17 +277,17 @@ fn cdata_open_inside( /// ``` fn cdata(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => (State::Nok, None), + Code::None => (State::Nok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { at_line_ending(tokenizer, code, Box::new(cdata)) } Code::Char(']') => { tokenizer.consume(code); - (State::Fn(Box::new(cdata_close)), None) + (State::Fn(Box::new(cdata_close)), 0) } _ => { tokenizer.consume(code); - (State::Fn(Box::new(cdata)), None) + (State::Fn(Box::new(cdata)), 0) } } } @@ -302,7 +302,7 @@ fn cdata_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(']') => { tokenizer.consume(code); - (State::Fn(Box::new(cdata_end)), None) + (State::Fn(Box::new(cdata_end)), 0) } _ => cdata(tokenizer, code), } @@ -336,7 +336,7 @@ fn declaration(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } _ => { tokenizer.consume(code); - (State::Fn(Box::new(declaration)), None) + (State::Fn(Box::new(declaration)), 0) } } } @@ -349,17 +349,17 @@ fn declaration(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn instruction(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => (State::Nok, None), + Code::None => (State::Nok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { at_line_ending(tokenizer, code, Box::new(instruction)) } Code::Char('?') => { tokenizer.consume(code); - (State::Fn(Box::new(instruction_close)), None) + (State::Fn(Box::new(instruction_close)), 0) } _ => { tokenizer.consume(code); - (State::Fn(Box::new(instruction)), None) + (State::Fn(Box::new(instruction)), 0) } } } @@ -387,9 +387,9 @@ fn tag_close_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('A'..='Z' | 'a'..='z') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_close)), None) + (State::Fn(Box::new(tag_close)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -403,7 +403,7 @@ fn tag_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_close)), None) + (State::Fn(Box::new(tag_close)), 0) } _ => tag_close_between(tokenizer, code), } @@ -422,7 +422,7 @@ fn tag_close_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_close_between)), None) + (State::Fn(Box::new(tag_close_between)), 0) } _ => end(tokenizer, code), } @@ -438,12 +438,12 @@ fn tag_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open)), None) + (State::Fn(Box::new(tag_open)), 0) } Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | '\n' | '\r' | ' ' | '/' | '>') => tag_open_between(tokenizer, code), - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -460,15 +460,15 @@ fn tag_open_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open_between)), None) + (State::Fn(Box::new(tag_open_between)), 0) } Code::Char('/') => { tokenizer.consume(code); - (State::Fn(Box::new(end)), None) + (State::Fn(Box::new(end)), 0) } Code::Char(':' | 'A'..='Z' | '_' | 'a'..='z') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open_attribute_name)), None) + (State::Fn(Box::new(tag_open_attribute_name)), 0) } _ => end(tokenizer, code), } @@ -484,7 +484,7 @@ fn tag_open_attribute_name(tokenizer: &mut Tokenizer, code: Code) -> StateFnResu match code { Code::Char('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open_attribute_name)), None) + (State::Fn(Box::new(tag_open_attribute_name)), 0) } _ => tag_open_attribute_name_after(tokenizer, code), } @@ -504,11 +504,11 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer, code: Code) -> State } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open_attribute_name_after)), None) + (State::Fn(Box::new(tag_open_attribute_name_after)), 0) } Code::Char('=') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open_attribute_value_before)), None) + (State::Fn(Box::new(tag_open_attribute_value_before)), 0) } _ => tag_open_between(tokenizer, code), } @@ -523,13 +523,13 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer, code: Code) -> State /// ``` fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None), + Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { at_line_ending(tokenizer, code, Box::new(tag_open_attribute_value_before)) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open_attribute_value_before)), None) + (State::Fn(Box::new(tag_open_attribute_value_before)), 0) } Code::Char(char) if char == '"' || char == '\'' => { tokenizer.consume(code); @@ -537,12 +537,12 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> Sta State::Fn(Box::new(move |t, c| { tag_open_attribute_value_quoted(t, c, char) })), - None, + 0, ) } Code::Char(_) => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None) + (State::Fn(Box::new(tag_open_attribute_value_unquoted)), 0) } } } @@ -559,7 +559,7 @@ fn tag_open_attribute_value_quoted( marker: char, ) -> StateFnResult { match code { - Code::None => (State::Nok, None), + Code::None => (State::Nok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => at_line_ending( tokenizer, code, @@ -569,7 +569,7 @@ fn tag_open_attribute_value_quoted( tokenizer.consume(code); ( State::Fn(Box::new(tag_open_attribute_value_quoted_after)), - None, + 0, ) } _ => { @@ -578,7 +578,7 @@ fn tag_open_attribute_value_quoted( State::Fn(Box::new(move |t, c| { tag_open_attribute_value_quoted(t, c, marker) })), - None, + 0, ) } } @@ -592,13 +592,13 @@ fn tag_open_attribute_value_quoted( /// ``` fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, None), + Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, 0), Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | '\n' | '\r' | ' ' | '/' | '>') => tag_open_between(tokenizer, code), Code::Char(_) => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None) + (State::Fn(Box::new(tag_open_attribute_value_unquoted)), 0) } } } @@ -615,7 +615,7 @@ fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer, code: Code) Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | '\n' | '\r' | ' ' | '>' | '/') => tag_open_between(tokenizer, code), - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -631,9 +631,9 @@ fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.consume(code); tokenizer.exit(Token::HtmlTextData); tokenizer.exit(Token::HtmlText); - (State::Ok, None) + (State::Ok, 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -660,7 +660,7 @@ fn at_line_ending( tokenizer.exit(Token::LineEnding); ( State::Fn(Box::new(|t, c| after_line_ending(t, c, return_state))), - None, + 0, ) } _ => unreachable!("expected eol"), diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 2dd8222..504571d 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -230,11 +230,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(Token::LabelMarker); tokenizer.exit(Token::LabelEnd); - return (State::Fn(Box::new(move |t, c| after(t, c, info))), None); + return (State::Fn(Box::new(move |t, c| after(t, c, info))), 0); } } - (State::Nok, None) + (State::Nok, 0) } /// After `]`. @@ -346,7 +346,7 @@ fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { info.media.end.1 = tokenizer.events.len() - 1; tokenizer.media_list.push(info.media); tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } /// Done, it’s nothing. @@ -367,7 +367,7 @@ fn nok(tokenizer: &mut Tokenizer, _code: Code, label_start_index: usize) -> Stat .get_mut(label_start_index) .unwrap(); label_start.balanced = true; - (State::Nok, None) + (State::Nok, 0) } /// Before a resource, at `(`. @@ -383,7 +383,7 @@ fn resource(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::ResourceMarker); tokenizer.consume(code); tokenizer.exit(Token::ResourceMarker); - (State::Fn(Box::new(resource_start)), None) + (State::Fn(Box::new(resource_start)), 0) } _ => unreachable!("expected `(`"), } @@ -489,9 +489,9 @@ fn resource_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.consume(code); tokenizer.exit(Token::ResourceMarker); tokenizer.exit(Token::Resource); - (State::Ok, None) + (State::Ok, 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -559,9 +559,9 @@ fn full_reference_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult false, ))) { - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -580,9 +580,9 @@ fn collapsed_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::ReferenceMarker); tokenizer.consume(code); tokenizer.exit(Token::ReferenceMarker); - (State::Fn(Box::new(collapsed_reference_open)), None) + (State::Fn(Box::new(collapsed_reference_open)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -601,9 +601,9 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes tokenizer.consume(code); tokenizer.exit(Token::ReferenceMarker); tokenizer.exit(Token::Reference); - (State::Ok, None) + (State::Ok, 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index fd7a42d..3764f20 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -45,9 +45,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::LabelImageMarker); tokenizer.consume(code); tokenizer.exit(Token::LabelImageMarker); - (State::Fn(Box::new(open)), None) + (State::Fn(Box::new(open)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -71,8 +71,8 @@ pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { inactive: false, }); tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); - (State::Ok, None) + (State::Ok, 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index aeaa4eb..404b91a 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -52,8 +52,8 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { inactive: false, }); tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); - (State::Ok, None) + (State::Ok, 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } diff --git a/src/construct/list.rs b/src/construct/list.rs index 06be7ec..289398a 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -148,7 +148,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::ListItem); tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -170,7 +170,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::ListItemValue); inside(tokenizer, code, 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -197,16 +197,13 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { match code { Code::Char(char) if char.is_ascii_digit() && size + 1 < LIST_ITEM_VALUE_SIZE_MAX => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| inside(t, c, size + 1))), - None, - ) + (State::Fn(Box::new(move |t, c| inside(t, c, size + 1))), 0) } Code::Char('.' | ')') if !tokenizer.interrupt || size < 2 => { tokenizer.exit(Token::ListItemValue); marker(tokenizer, code) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -222,7 +219,7 @@ fn marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::ListItemMarker); tokenizer.consume(code); tokenizer.exit(Token::ListItemMarker); - (State::Fn(Box::new(marker_after)), None) + (State::Fn(Box::new(marker_after)), 0) } /// After a list item marker. @@ -278,9 +275,9 @@ fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) { - (State::Nok, None) + (State::Nok, 0) } else { - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } } @@ -296,9 +293,9 @@ fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::SpaceOrTab); tokenizer.consume(code); tokenizer.exit(Token::SpaceOrTab); - (State::Fn(Box::new(|t, c| after(t, c, false))), None) + (State::Fn(Box::new(|t, c| after(t, c, false))), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -310,7 +307,7 @@ fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn after(tokenizer: &mut Tokenizer, code: Code, blank: bool) -> StateFnResult { if blank && tokenizer.interrupt { - (State::Nok, None) + (State::Nok, 0) } else { let start = skip::to_back( &tokenizer.events, @@ -326,7 +323,7 @@ fn after(tokenizer: &mut Tokenizer, code: Code, blank: bool) -> StateFnResult { tokenizer.exit(Token::ListItemPrefix); tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve_list_item)); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } } @@ -356,7 +353,7 @@ pub fn blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let size = container.size; if container.blank_initial { - (State::Nok, None) + (State::Nok, 0) } else { // Consume, optionally, at most `size`. tokenizer.go(space_or_tab_min_max(0, size), ok)(tokenizer, code) @@ -382,12 +379,12 @@ pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// A state fn to yield [`State::Ok`]. pub fn ok(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } /// A state fn to yield [`State::Nok`]. fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult { - (State::Nok, None) + (State::Nok, 0) } /// Find adjacent list items with the same marker. diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index a2eb6ff..e43ee43 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -69,11 +69,11 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve)); // You’d be interrupting. tokenizer.interrupt = true; - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } _ => { tokenizer.consume(code); - (State::Fn(Box::new(inside)), None) + (State::Fn(Box::new(inside)), 0) } } } diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index 98b1877..3701e40 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -20,7 +20,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, stop: &'static [Code]) -> St if stop.contains(&code) { tokenizer.enter(Token::Data); tokenizer.consume(code); - (State::Fn(Box::new(move |t, c| data(t, c, stop))), None) + (State::Fn(Box::new(move |t, c| data(t, c, stop))), 0) } else { at_break(tokenizer, code, stop) } @@ -34,16 +34,16 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, stop: &'static [Code]) -> St /// ``` fn at_break(tokenizer: &mut Tokenizer, code: Code, stop: &'static [Code]) -> StateFnResult { match code { - Code::None => (State::Ok, None), + Code::None => (State::Ok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(move |t, c| at_break(t, c, stop))), None) + (State::Fn(Box::new(move |t, c| at_break(t, c, stop))), 0) } _ if stop.contains(&code) => { tokenizer.register_resolver("data".to_string(), Box::new(resolve_data)); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } _ => { tokenizer.enter(Token::Data); @@ -70,7 +70,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: &'static [Code]) -> StateFn at_break(tokenizer, code, stop) } else { tokenizer.consume(code); - (State::Fn(Box::new(move |t, c| data(t, c, stop))), None) + (State::Fn(Box::new(move |t, c| data(t, c, stop))), 0) } } diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index f5299d2..de2952c 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -123,15 +123,12 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFn tokenizer.enter(info.options.marker.clone()); tokenizer.consume(code); tokenizer.exit(info.options.marker.clone()); - ( - State::Fn(Box::new(|t, c| enclosed_before(t, c, info))), - None, - ) + (State::Fn(Box::new(|t, c| enclosed_before(t, c, info))), 0) } Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(' ' | ')') => { - (State::Nok, None) + (State::Nok, 0) } - Code::Char(char) if char.is_ascii_control() => (State::Nok, None), + Code::Char(char) if char.is_ascii_control() => (State::Nok, 0), Code::Char(_) => { tokenizer.enter(info.options.destination.clone()); tokenizer.enter(info.options.raw.clone()); @@ -155,7 +152,7 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFn tokenizer.exit(info.options.marker.clone()); tokenizer.exit(info.options.literal.clone()); tokenizer.exit(info.options.destination); - (State::Ok, None) + (State::Ok, 0) } else { tokenizer.enter(info.options.string.clone()); tokenizer.enter_with_content(Token::Data, Some(ContentType::String)); @@ -177,18 +174,15 @@ fn enclosed(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult enclosed_before(tokenizer, code, info) } Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r' | '<') => { - (State::Nok, None) + (State::Nok, 0) } Code::Char('\\') => { tokenizer.consume(code); - ( - State::Fn(Box::new(|t, c| enclosed_escape(t, c, info))), - None, - ) + (State::Fn(Box::new(|t, c| enclosed_escape(t, c, info))), 0) } _ => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| enclosed(t, c, info))), None) + (State::Fn(Box::new(|t, c| enclosed(t, c, info))), 0) } } } @@ -203,7 +197,7 @@ fn enclosed_escape(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFn match code { Code::Char('<' | '>' | '\\') => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| enclosed(t, c, info))), None) + (State::Fn(Box::new(|t, c| enclosed(t, c, info))), 0) } _ => enclosed(tokenizer, code, info), } @@ -219,11 +213,11 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::Char('(') => { if info.balance >= info.options.limit { - (State::Nok, None) + (State::Nok, 0) } else { tokenizer.consume(code); info.balance += 1; - (State::Fn(Box::new(move |t, c| raw(t, c, info))), None) + (State::Fn(Box::new(move |t, c| raw(t, c, info))), 0) } } Code::Char(')') => { @@ -232,11 +226,11 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { tokenizer.exit(info.options.string.clone()); tokenizer.exit(info.options.raw.clone()); tokenizer.exit(info.options.destination); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } else { tokenizer.consume(code); info.balance -= 1; - (State::Fn(Box::new(move |t, c| raw(t, c, info))), None) + (State::Fn(Box::new(move |t, c| raw(t, c, info))), 0) } } Code::None @@ -244,26 +238,23 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { | Code::VirtualSpace | Code::Char('\t' | '\n' | '\r' | ' ') => { if info.balance > 0 { - (State::Nok, None) + (State::Nok, 0) } else { tokenizer.exit(Token::Data); tokenizer.exit(info.options.string.clone()); tokenizer.exit(info.options.raw.clone()); tokenizer.exit(info.options.destination); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } } - Code::Char(char) if char.is_ascii_control() => (State::Nok, None), + Code::Char(char) if char.is_ascii_control() => (State::Nok, 0), Code::Char('\\') => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| raw_escape(t, c, info))), - None, - ) + (State::Fn(Box::new(move |t, c| raw_escape(t, c, info))), 0) } Code::Char(_) => { tokenizer.consume(code); - (State::Fn(Box::new(move |t, c| raw(t, c, info))), None) + (State::Fn(Box::new(move |t, c| raw(t, c, info))), 0) } } } @@ -278,7 +269,7 @@ fn raw_escape(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResul match code { Code::Char('(' | ')' | '\\') => { tokenizer.consume(code); - (State::Fn(Box::new(move |t, c| raw(t, c, info))), None) + (State::Fn(Box::new(move |t, c| raw(t, c, info))), 0) } _ => raw(tokenizer, code, info), } diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index b1d02e8..0892dbd 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -110,9 +110,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFn tokenizer.consume(code); tokenizer.exit(info.options.marker.clone()); tokenizer.enter(info.options.string.clone()); - (State::Fn(Box::new(|t, c| at_break(t, c, info))), None) + (State::Fn(Box::new(|t, c| at_break(t, c, info))), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -124,16 +124,16 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFn /// ``` fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { - Code::None | Code::Char('[') => (State::Nok, None), - Code::Char(']') if !info.data => (State::Nok, None), - _ if info.size > LINK_REFERENCE_SIZE_MAX => (State::Nok, None), + Code::None | Code::Char('[') => (State::Nok, 0), + Code::Char(']') if !info.data => (State::Nok, 0), + _ if info.size > LINK_REFERENCE_SIZE_MAX => (State::Nok, 0), Code::Char(']') => { tokenizer.exit(info.options.string.clone()); tokenizer.enter(info.options.marker.clone()); tokenizer.consume(code); tokenizer.exit(info.options.marker.clone()); tokenizer.exit(info.options.label); - (State::Ok, None) + (State::Ok, 0) } Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.go( space_or_tab_eol_with_options(EolOptions { @@ -179,7 +179,7 @@ fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); info.size += 1; - (State::Fn(Box::new(|t, c| label(t, c, info))), None) + (State::Fn(Box::new(|t, c| label(t, c, info))), 0) } Code::Char('\\') => { tokenizer.consume(code); @@ -187,7 +187,7 @@ fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult if !info.data { info.data = true; } - (State::Fn(Box::new(|t, c| escape(t, c, info))), None) + (State::Fn(Box::new(|t, c| escape(t, c, info))), 0) } Code::Char(_) => { tokenizer.consume(code); @@ -195,7 +195,7 @@ fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult if !info.data { info.data = true; } - (State::Fn(Box::new(|t, c| label(t, c, info))), None) + (State::Fn(Box::new(|t, c| label(t, c, info))), 0) } } } @@ -211,7 +211,7 @@ fn escape(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResul Code::Char('[' | '\\' | ']') => { tokenizer.consume(code); info.size += 1; - (State::Fn(Box::new(|t, c| label(t, c, info))), None) + (State::Fn(Box::new(|t, c| label(t, c, info))), 0) } _ => label(tokenizer, code, info), } diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 37e9ce3..c3b82c7 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -26,9 +26,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(after)), None) + (State::Fn(Box::new(after)), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -41,8 +41,8 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if tokenizer.lazy { - (State::Nok, None) + (State::Nok, 0) } else { - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } } diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 9ee8b6c..aacf28c 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -108,7 +108,7 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> { kind: Token::SpaceOrTab, min: 1, max: usize::MAX, - content_type: info.options.content_type, + content_type: info.options.content_type.clone(), connect: info.options.connect, }), move |ok| { @@ -135,7 +135,8 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> { fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::VirtualSpace | Code::Char('\t' | ' ') if info.options.max > 0 => { - tokenizer.enter_with_content(info.options.kind.clone(), info.options.content_type); + tokenizer + .enter_with_content(info.options.kind.clone(), info.options.content_type.clone()); if info.options.content_type.is_some() { let index = tokenizer.events.len() - 1; @@ -144,16 +145,15 @@ fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult tokenizer.consume(code); info.size += 1; - (State::Fn(Box::new(|t, c| inside(t, c, info))), None) + (State::Fn(Box::new(|t, c| inside(t, c, info))), 0) } - _ => ( + _ => { if info.options.min == 0 { - State::Ok + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } else { - State::Nok - }, - Some(vec![code]), - ), + (State::Nok, 0) + } + } } } @@ -168,18 +168,15 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResul Code::VirtualSpace | Code::Char('\t' | ' ') if info.size < info.options.max => { tokenizer.consume(code); info.size += 1; - (State::Fn(Box::new(|t, c| inside(t, c, info))), None) + (State::Fn(Box::new(|t, c| inside(t, c, info))), 0) } _ => { tokenizer.exit(info.options.kind.clone()); - ( - if info.size >= info.options.min { - State::Ok - } else { - State::Nok - }, - Some(vec![code]), - ) + if info.size >= info.options.min { + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) + } else { + (State::Nok, 0) + } } } } @@ -194,7 +191,7 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResul fn after_space_or_tab(tokenizer: &mut Tokenizer, code: Code, mut info: EolInfo) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - tokenizer.enter_with_content(Token::LineEnding, info.options.content_type); + tokenizer.enter_with_content(Token::LineEnding, info.options.content_type.clone()); if info.connect { let index = tokenizer.events.len() - 1; @@ -205,10 +202,10 @@ fn after_space_or_tab(tokenizer: &mut Tokenizer, code: Code, mut info: EolInfo) tokenizer.consume(code); tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(|t, c| after_eol(t, c, info))), None) + (State::Fn(Box::new(|t, c| after_eol(t, c, info))), 0) } - _ if info.ok => (State::Ok, Some(vec![code])), - _ => (State::Nok, None), + _ if info.ok => (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }), + _ => (State::Nok, 0), } } @@ -246,8 +243,8 @@ fn after_more_space_or_tab(_tokenizer: &mut Tokenizer, code: Code) -> StateFnRes code, Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') ) { - (State::Nok, None) + (State::Nok, 0) } else { - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } } diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 852693a..f69d609 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -147,9 +147,9 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFn tokenizer.enter(info.options.marker.clone()); tokenizer.consume(code); tokenizer.exit(info.options.marker.clone()); - (State::Fn(Box::new(|t, c| begin(t, c, info))), None) + (State::Fn(Box::new(|t, c| begin(t, c, info))), 0) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -168,7 +168,7 @@ fn begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { tokenizer.consume(code); tokenizer.exit(info.options.marker.clone()); tokenizer.exit(info.options.title); - (State::Ok, None) + (State::Ok, 0) } _ => { tokenizer.enter(info.options.string.clone()); @@ -189,7 +189,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes tokenizer.exit(info.options.string.clone()); begin(tokenizer, code, info) } - Code::None => (State::Nok, None), + Code::None => (State::Nok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.go( space_or_tab_eol_with_options(EolOptions { content_type: Some(ContentType::String), @@ -233,11 +233,11 @@ fn title(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { } Code::Char('\\') => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| escape(t, c, info))), None) + (State::Fn(Box::new(|t, c| escape(t, c, info))), 0) } _ => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| title(t, c, info))), None) + (State::Fn(Box::new(|t, c| title(t, c, info))), 0) } } } @@ -252,7 +252,7 @@ fn escape(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char(char) if char == info.kind.as_char() => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| title(t, c, info))), None) + (State::Fn(Box::new(|t, c| title(t, c, info))), 0) } _ => title(tokenizer, code, info), } diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs index c9ec564..7624ec0 100644 --- a/src/construct/partial_whitespace.rs +++ b/src/construct/partial_whitespace.rs @@ -52,11 +52,11 @@ fn at_eol(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { ) { ok(tokenizer, code) } else { - (State::Nok, None) + (State::Nok, 0) } } /// Fine. fn ok(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 48fb838..d87778f 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -145,7 +145,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::ThematicBreak); tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) } else { - (State::Nok, None) + (State::Nok, 0) } } @@ -165,7 +165,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { size: 0, }, ), - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -183,13 +183,13 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult tokenizer.exit(Token::ThematicBreak); // Feel free to interrupt. tokenizer.interrupt = false; - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } Code::Char(char) if char == info.kind.as_char() => { tokenizer.enter(Token::ThematicBreakSequence); sequence(tokenizer, code, info) } - _ => (State::Nok, None), + _ => (State::Nok, 0), } } @@ -204,7 +204,7 @@ fn sequence(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes Code::Char(char) if char == info.kind.as_char() => { tokenizer.consume(code); info.size += 1; - (State::Fn(Box::new(|t, c| sequence(t, c, info))), None) + (State::Fn(Box::new(|t, c| sequence(t, c, info))), 0) } _ => { tokenizer.exit(Token::ThematicBreakSequence); diff --git a/src/content/document.rs b/src/content/document.rs index 1a0ffee..163dcda 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -363,10 +363,10 @@ fn containers_after( tokenizer.go_until( state, |code| matches!(code, Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')), - move |(state, remainder)| { + move |(state, back)| { ( State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))), - remainder, + back, ) }, )(tokenizer, code) @@ -415,7 +415,7 @@ fn flow_end( resolve(tokenizer, &mut info); - (State::Ok, Some(vec![code])) + (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } State::Nok => unreachable!("unexpected `nok` from flow"), State::Fn(func) => { @@ -441,7 +441,7 @@ fn exit_containers( info.next = Box::new(flow); // This is weird but Rust needs a function there. let result = tokenizer.flush(next); assert!(matches!(result.0, State::Ok)); - assert!(result.1.is_none()); + assert_eq!(result.1, 0); if *phase == Phase::Prefix { info.index = tokenizer.events.len(); diff --git a/src/content/flow.rs b/src/content/flow.rs index e52f113..722e2bb 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -41,7 +41,7 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => (State::Ok, None), + Code::None => (State::Ok, 0), _ => tokenizer.attempt(blank_line, |ok| { Box::new(if ok { blank_line_after } else { initial_before }) })(tokenizer, code), @@ -62,7 +62,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => (State::Ok, None), + Code::None => (State::Ok, 0), _ => tokenizer.attempt_n( vec![ Box::new(code_indented), @@ -87,14 +87,14 @@ fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => (State::Ok, None), + Code::None => (State::Ok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.enter(Token::BlankLineEnding); tokenizer.consume(code); tokenizer.exit(Token::BlankLineEnding); // Feel free to interrupt. tokenizer.interrupt = false; - (State::Fn(Box::new(start)), None) + (State::Fn(Box::new(start)), 0) } _ => unreachable!("expected eol/eof"), } @@ -111,12 +111,12 @@ fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => (State::Ok, None), + Code::None => (State::Ok, 0), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(code); tokenizer.exit(Token::LineEnding); - (State::Fn(Box::new(start)), None) + (State::Fn(Box::new(start)), 0) } _ => unreachable!("expected eol/eof"), } diff --git a/src/content/string.rs b/src/content/string.rs index 609a788..f63b8be 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -29,7 +29,7 @@ const MARKERS: [Code; 5] = [ /// Before string. pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => (State::Ok, None), + Code::None => (State::Ok, 0), _ => tokenizer.attempt_n( vec![ Box::new(character_reference), diff --git a/src/content/text.rs b/src/content/text.rs index 73a798a..c339324 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -47,7 +47,7 @@ const MARKERS: [Code; 12] = [ /// Before text. pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None => (State::Ok, None), + Code::None => (State::Ok, 0), _ => tokenizer.attempt_n( vec![ Box::new(attention), diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 1f4c6e3..ad6f53f 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -47,12 +47,14 @@ pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { .link .as_mut() .expect("expected `link` on previous"); - let conten_type_previous = link_previous.content_type; link_previous.next = Some(next); let link_next = events[next].link.as_mut().expect("expected `link` on next"); link_next.previous = Some(pevious); - assert_eq!(conten_type_previous, link_next.content_type); + assert_eq!( + events[pevious].link.as_ref().unwrap().content_type, + events[next].link.as_ref().unwrap().content_type + ); } /// Parse linked events. @@ -83,7 +85,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { } else { text })), - None, + 0, ); // Loop through links to pass them in order to the subtokenizer. @@ -110,7 +112,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { func, link_curr.next == None, ); - assert!(result.1.is_none(), "expected no remainder"); + assert_eq!(result.1, 0, "expected no remainder"); link_index = link_curr.next; } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f5ac1af..544e8b0 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -16,7 +16,7 @@ use crate::token::{Token, VOID_TOKENS}; use crate::util::edit_map::EditMap; /// Embedded content type. -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum ContentType { /// Represents [text content][crate::content::text]. Text, @@ -44,7 +44,7 @@ pub enum Code { /// /// The interface for the location in the document comes from unist `Point`: /// <https://github.com/syntax-tree/unist#point>. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone)] pub struct Point { /// 1-indexed line number. pub line: usize, @@ -92,7 +92,7 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> StateFnResult; /// Each [`StateFn`][] yields something back: primarily the state. /// In certain cases, it can also yield back up parsed codes that were passed down. -pub type StateFnResult = (State, Option<Vec<Code>>); +pub type StateFnResult = (State, usize); /// Callback that can be registered and is called when the tokenizer is done. /// @@ -479,11 +479,11 @@ impl<'a> Tokenizer<'a> { state_fn, |_code| false, vec![], - |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer, _state| { - if ok { - feed_impl(tokenizer, &if ok { result.1 } else { result.0 }, after) + |result: (Vec<Code>, Vec<Code>), tokenizer: &mut Tokenizer, state| { + if matches!(state, State::Ok) { + feed_impl(tokenizer, &result.1, after) } else { - (State::Nok, None) + (State::Nok, 0) } }, ) @@ -502,9 +502,9 @@ impl<'a> Tokenizer<'a> { state_fn, until, vec![], - |result: (Vec<Code>, Vec<Code>), _ok, tokenizer: &mut Tokenizer, state| { + |result: (Vec<Code>, Vec<Code>), tokenizer: &mut Tokenizer, state| { tokenizer.consumed = true; - done(check_statefn_result((state, Some(result.1)))) + done((state, result.1.len())) }, ) } @@ -529,9 +529,10 @@ impl<'a> Tokenizer<'a> { state_fn, |_code| false, vec![], - |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer, _state| { + |mut result: (Vec<Code>, Vec<Code>), tokenizer: &mut Tokenizer, state| { tokenizer.free(previous); - feed_impl(tokenizer, &result.0, done(ok)) + result.0.append(&mut result.1); + feed_impl(tokenizer, &result.0, done(matches!(state, State::Ok))) }, ) } @@ -558,12 +559,19 @@ impl<'a> Tokenizer<'a> { state_fn, |_code| false, vec![], - |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer, _state| { + |mut result: (Vec<Code>, Vec<Code>), tokenizer: &mut Tokenizer, state| { + let ok = matches!(state, State::Ok); + if !ok { tokenizer.free(previous); } - let codes = if ok { result.1 } else { result.0 }; + let codes = if ok { + result.1 + } else { + result.0.append(&mut result.1); + result.0 + }; log::debug!( "attempt: {:?}, codes: {:?}, at {:?}", @@ -571,6 +579,7 @@ impl<'a> Tokenizer<'a> { codes, tokenizer.point ); + feed_impl(tokenizer, &codes, done(ok)) }, ) @@ -670,19 +679,19 @@ fn attempt_impl( state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, mut pause: impl FnMut(Code) -> bool + 'static, mut codes: Vec<Code>, - done: impl FnOnce((Vec<Code>, Vec<Code>), bool, &mut Tokenizer, State) -> StateFnResult + 'static, + done: impl FnOnce((Vec<Code>, Vec<Code>), &mut Tokenizer, State) -> StateFnResult + 'static, ) -> Box<StateFn> { Box::new(|tokenizer, code| { if !codes.is_empty() && pause(tokenizer.previous) { - return done( - (codes, vec![code]), - false, - tokenizer, - State::Fn(Box::new(state)), - ); + let after = if matches!(code, Code::None) { + vec![] + } else { + vec![code] + }; + return done((codes, after), tokenizer, State::Fn(Box::new(state))); } - let (next, remainder) = check_statefn_result(state(tokenizer, code)); + let (next, back) = state(tokenizer, code); match code { Code::None => {} @@ -691,22 +700,19 @@ fn attempt_impl( } } - if let Some(ref list) = remainder { - assert!( - list.len() <= codes.len(), - "`remainder` must be less than or equal to `codes`" - ); - } + assert!( + back <= codes.len(), + "`back` must be smaller than or equal to `codes.len()`" + ); match next { - State::Ok => { - let remaining = if let Some(x) = remainder { x } else { vec![] }; - check_statefn_result(done((codes, remaining), true, tokenizer, next)) + State::Ok | State::Nok => { + let remaining = codes.split_off(codes.len() - back); + done((codes, remaining), tokenizer, next) } - State::Nok => check_statefn_result(done((codes, vec![]), false, tokenizer, next)), State::Fn(func) => { - assert!(remainder.is_none(), "expected no remainder"); - check_statefn_result((State::Fn(attempt_impl(func, pause, codes, done)), None)) + assert_eq!(back, 0, "expected no remainder"); + (State::Fn(attempt_impl(func, pause, codes, done)), 0) } } }) @@ -727,27 +733,18 @@ fn feed_impl( let code = codes[index]; match state { - State::Nok | State::Ok => { - break; - } + State::Ok | State::Nok => break, State::Fn(func) => { - log::debug!("main: passing: `{:?}`", code); + log::debug!("main: passing: `{:?}` ({:?})", code, index); tokenizer.expect(code, false); - let (next, remainder) = check_statefn_result(func(tokenizer, code)); + let (next, back) = func(tokenizer, code); state = next; - index = index + 1 - - (if let Some(ref x) = remainder { - x.len() - } else { - 0 - }); + index = index + 1 - back; } } } - // Yield to a higher loop. - // To do: do not copy? - check_statefn_result((state, Some(codes[index..].to_vec()))) + (state, codes.len() - index) } /// Flush `start`: pass `eof`s to it until done. @@ -766,8 +763,8 @@ fn flush_impl( let code = Code::None; log::debug!("main: passing eof"); tokenizer.expect(code, false); - let (next, remainder) = check_statefn_result(func(tokenizer, code)); - assert!(remainder.is_none(), "expected no remainder"); + let (next, remainder) = func(tokenizer, code); + assert_eq!(remainder, 0, "expected no remainder"); state = next; } } @@ -778,7 +775,7 @@ fn flush_impl( _ => unreachable!("expected final state to be `State::Ok`"), } - check_statefn_result((state, None)) + (state, 0) } /// Define a jump between two places. @@ -798,27 +795,3 @@ fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, tokenizer.account_for_potential_skip(); } - -/// Check a [`StateFnResult`][], make sure its valid (that there are no bugs), -/// and clean a final eof passed back in `remainder`. -fn check_statefn_result(result: StateFnResult) -> StateFnResult { - let (state, mut remainder) = result; - - // Remove an eof. - // For convencience, feeding back an eof is allowed, but cleaned here. - // Most states handle eof and eol in the same branch, and hence pass - // all back. - // This might not be needed, because if EOF is passed back, we’re at the EOF. - // But they’re not supposed to be in codes, so here we remove them. - if let Some(ref mut list) = remainder { - if Some(&Code::None) == list.last() { - list.pop(); - } - - if list.is_empty() { - return (state, None); - } - } - - (state, remainder) -} |