From b4aa82f0f1ea3143ab5f221b2c5f564158605c84 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 18 Jul 2022 11:30:49 +0200 Subject: Fix token that should be void --- src/construct/heading_setext.rs | 6 ++++-- src/token.rs | 44 +++++++++++++++++++++++++++++++++++++++++ src/tokenizer.rs | 11 ++++++++++- 3 files changed, 58 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 2078338..cb426a9 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -167,7 +167,10 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { tokenizer.consume(code); (State::Fn(Box::new(move |t, c| inside(t, c, kind))), None) } - _ => tokenizer.attempt_opt(space_or_tab(), after)(tokenizer, code), + _ => { + tokenizer.exit(Token::HeadingSetextUnderline); + tokenizer.attempt_opt(space_or_tab(), after)(tokenizer, code) + } } } @@ -180,7 +183,6 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - tokenizer.exit(Token::HeadingSetextUnderline); // Feel free to interrupt. tokenizer.interrupt = false; tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve)); diff --git a/src/token.rs b/src/token.rs index 8169657..2e5e2df 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1877,3 +1877,47 @@ pub enum Token { /// ``` ThematicBreakSequence, } + +/// List of void tokens, used to make sure everything is working good. +pub const VOID_TOKENS: [Token; 40] = [ + Token::AttentionSequence, + Token::AutolinkEmail, + Token::AutolinkMarker, + Token::AutolinkProtocol, + Token::BlankLineEnding, + Token::BlockQuoteMarker, + Token::CharacterEscapeMarker, + Token::CharacterEscapeValue, + Token::CharacterReferenceMarker, + Token::CharacterReferenceMarkerHexadecimal, + Token::CharacterReferenceMarkerNumeric, + Token::CharacterReferenceMarkerSemi, + Token::CharacterReferenceValue, + Token::CodeFencedFenceSequence, + Token::CodeFlowChunk, + Token::CodeTextData, + Token::CodeTextLineEnding, + Token::CodeTextSequence, + Token::Data, + Token::DefinitionDestinationLiteralMarker, + Token::DefinitionLabelMarker, + Token::DefinitionMarker, + Token::DefinitionTitleMarker, + Token::EmphasisSequence, + Token::HardBreakEscapeMarker, + Token::HardBreakTrailingSpace, + Token::HeadingAtxSequence, + Token::HeadingSetextUnderline, + Token::HtmlFlowData, + Token::HtmlTextData, + Token::LabelImageMarker, + Token::LabelMarker, + Token::LineEnding, + Token::ListItemMarker, + Token::ListItemValue, + Token::ReferenceMarker, + Token::ResourceMarker, + Token::ResourceTitleMarker, + Token::StrongSequence, + Token::ThematicBreakSequence, +]; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 10c877f..9b39e28 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -12,7 +12,7 @@ //! [`check`]: Tokenizer::check use crate::parser::ParseState; -use crate::token::Token; +use crate::token::{Token, VOID_TOKENS}; use std::collections::HashMap; /// Embedded content type. @@ -385,6 +385,15 @@ impl<'a> Tokenizer<'a> { "expected non-empty token" ); + if VOID_TOKENS.iter().any(|d| d == &token_type) { + assert!( + current_token == previous.token_type, + "expected token to be void (`{:?}`), instead of including `{:?}`", + current_token, + previous.token_type + ); + } + // A bit weird, but if we exit right after a line ending, we *don’t* want to consider // potential skips. if matches!( -- cgit