From bd0cb0d0395abb06941960938aacc3639148a96c Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 8 Jul 2022 10:51:45 +0200 Subject: Add support for concrete constructs --- src/construct/code_fenced.rs | 19 ++++++++++++++++--- src/construct/html_flow.rs | 25 +++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 5 deletions(-) (limited to 'src/construct') diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 617979f..e2165a9 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -175,6 +175,8 @@ struct Info { prefix: usize, /// Kind of fences. kind: Kind, + /// To do. + concrete: bool, } /// Start of fenced code. @@ -218,6 +220,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult Info { prefix, size: 0, + concrete: tokenizer.concrete, kind: Kind::from_code(code), }, ) @@ -264,6 +267,8 @@ fn info_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } _ => { @@ -292,6 +297,8 @@ fn info_inside( tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceInfo); tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } Code::VirtualSpace | Code::Char('\t' | ' ') => { @@ -322,6 +329,8 @@ fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } _ => { @@ -345,6 +354,8 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceMeta); tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } Code::Char('`') if info.kind == Kind::GraveAccent => (State::Nok, None), @@ -366,12 +377,12 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult let clone = info.clone(); match code { - Code::None => after(tokenizer, code), + Code::None => after(tokenizer, code, info), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt( |t, c| close_begin(t, c, info), |ok| { if ok { - Box::new(after) + Box::new(|t, c| after(t, c, clone)) } else { Box::new(|t, c| content_before(t, c, clone)) } @@ -557,9 +568,11 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateF /// console.log('1') /// ~~~| /// ``` -fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +fn after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { tokenizer.exit(Token::CodeFenced); // Feel free to interrupt. tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; (State::Ok, Some(vec![code])) } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index fde0a34..f30db3f 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -106,8 +106,6 @@ use crate::token::Token; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; use crate::util::codes::{parse, serialize}; -// To do: mark as concrete (block quotes or lists can’t “pierce” into HTML). - /// Kind of HTML (flow). #[derive(Debug, PartialEq)] enum Kind { @@ -195,6 +193,8 @@ struct Info { index: usize, /// Current quote, when in a double or single quoted attribute value. quote: Option, + /// To do. + concrete: bool, } /// Start of HTML (flow), before optional whitespace. @@ -240,6 +240,7 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { buffer: vec![], index: 0, quote: None, + concrete: tokenizer.concrete, }; match code { @@ -260,6 +261,8 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { Code::Char('?') => { info.kind = Kind::Instruction; tokenizer.consume(code); + // Do not form containers. + tokenizer.concrete = true; // While we’re in an instruction instead of a declaration, we’re on a `?` // right now, so we do need to search for `>`, similar to declarations. ( @@ -305,6 +308,8 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> St Code::Char('A'..='Z' | 'a'..='z') => { tokenizer.consume(code); info.kind = Kind::Declaration; + // Do not form containers. + tokenizer.concrete = true; ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, @@ -323,6 +328,8 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta match code { Code::Char('-') => { tokenizer.consume(code); + // Do not form containers. + tokenizer.concrete = true; ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, @@ -348,6 +355,8 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> S if info.index == info.buffer.len() { info.buffer.clear(); + // Do not form containers. + tokenizer.concrete = true; (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) } else { ( @@ -396,6 +405,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) { info.kind = Kind::Raw; + // Do not form containers. + tokenizer.concrete = true; continuation(tokenizer, code, info) } else if HTML_BLOCK_NAMES.contains(&name) { // Basic is assumed, no need to set `kind`. @@ -406,6 +417,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes None, ) } else { + // Do not form containers. + tokenizer.concrete = true; continuation(tokenizer, code, info) } } else { @@ -439,6 +452,8 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat match code { Code::Char('>') => { tokenizer.consume(code); + // Do not form containers. + tokenizer.concrete = true; (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) } _ => (State::Nok, None), @@ -695,6 +710,8 @@ fn complete_end(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes fn complete_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + // Do not form containers. + tokenizer.concrete = true; continuation(tokenizer, code, info) } Code::VirtualSpace | Code::Char('\t' | ' ') => { @@ -793,6 +810,8 @@ fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta tokenizer.exit(Token::HtmlFlow); // Feel free to interrupt. tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; (State::Ok, Some(vec![code])) } // To do: do not allow lazy lines. @@ -960,6 +979,8 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat tokenizer.exit(Token::HtmlFlow); // Feel free to interrupt. tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; (State::Ok, Some(vec![code])) } _ => { -- cgit