From bd0cb0d0395abb06941960938aacc3639148a96c Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 8 Jul 2022 10:51:45 +0200 Subject: Add support for concrete constructs --- readme.md | 7 +++---- src/construct/code_fenced.rs | 19 ++++++++++++++++--- src/construct/html_flow.rs | 25 +++++++++++++++++++++++-- src/content/document.rs | 15 ++++++++------- src/tokenizer.rs | 4 ++++ tests/block_quote.rs | 2 +- tests/code_fenced.rs | 22 ++++++++++------------ tests/html_flow.rs | 13 ++++++------- 8 files changed, 71 insertions(+), 36 deletions(-) diff --git a/readme.md b/readme.md index 0634eec..ad8aec0 100644 --- a/readme.md +++ b/readme.md @@ -48,8 +48,6 @@ cargo doc --document-private-items - [ ] (5) Containers: this will be rather messy, and depends a lot on how subtokenization is solved -- [ ] (3) Concrete constructs: HTML or code (fenced) cannot be “pierced” into by - containers - [ ] (3) Lazy lines, in containers, in flow in a paragraph, a line does not need to be indented - [ ] (5) There’s a lot of rust-related choosing whether to pass (mutable) @@ -129,7 +127,6 @@ cargo doc --document-private-items - [ ] (3) Fix some block quote bugs - [ ] (3) Lazy lines (`code indented`, `html flow`) -- [ ] (3) Concrete (`html flow`) - [ ] (8) list\ test (`character_reference`, `code_indented`, `heading_setext`, `html_flow`, `thematic_break`)\ @@ -274,4 +271,6 @@ important. - [x] (2) Fix resizing attention bug - [x] (2) Fix interleaving of attention/label - [x] (8) Add basic support for block quotes -- [x] (1) use `char::REPLACEMENT_CHARACTER`? +- [x] (1) Use `char::REPLACEMENT_CHARACTER`? +- [x] (3) Add support for concrete constructs + (html (flow) or code (fenced) cannot be “pierced” into by containers) diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 617979f..e2165a9 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -175,6 +175,8 @@ struct Info { prefix: usize, /// Kind of fences. kind: Kind, + /// To do. + concrete: bool, } /// Start of fenced code. @@ -218,6 +220,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult Info { prefix, size: 0, + concrete: tokenizer.concrete, kind: Kind::from_code(code), }, ) @@ -264,6 +267,8 @@ fn info_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } _ => { @@ -292,6 +297,8 @@ fn info_inside( tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceInfo); tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } Code::VirtualSpace | Code::Char('\t' | ' ') => { @@ -322,6 +329,8 @@ fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } _ => { @@ -345,6 +354,8 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceMeta); tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } Code::Char('`') if info.kind == Kind::GraveAccent => (State::Nok, None), @@ -366,12 +377,12 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult let clone = info.clone(); match code { - Code::None => after(tokenizer, code), + Code::None => after(tokenizer, code, info), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt( |t, c| close_begin(t, c, info), |ok| { if ok { - Box::new(after) + Box::new(|t, c| after(t, c, clone)) } else { Box::new(|t, c| content_before(t, c, clone)) } @@ -557,9 +568,11 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateF /// console.log('1') /// ~~~| /// ``` -fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +fn after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { tokenizer.exit(Token::CodeFenced); // Feel free to interrupt. tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; (State::Ok, Some(vec![code])) } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index fde0a34..f30db3f 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -106,8 +106,6 @@ use crate::token::Token; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; use crate::util::codes::{parse, serialize}; -// To do: mark as concrete (block quotes or lists can’t “pierce” into HTML). - /// Kind of HTML (flow). #[derive(Debug, PartialEq)] enum Kind { @@ -195,6 +193,8 @@ struct Info { index: usize, /// Current quote, when in a double or single quoted attribute value. quote: Option, + /// To do. + concrete: bool, } /// Start of HTML (flow), before optional whitespace. @@ -240,6 +240,7 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { buffer: vec![], index: 0, quote: None, + concrete: tokenizer.concrete, }; match code { @@ -260,6 +261,8 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { Code::Char('?') => { info.kind = Kind::Instruction; tokenizer.consume(code); + // Do not form containers. + tokenizer.concrete = true; // While we’re in an instruction instead of a declaration, we’re on a `?` // right now, so we do need to search for `>`, similar to declarations. ( @@ -305,6 +308,8 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> St Code::Char('A'..='Z' | 'a'..='z') => { tokenizer.consume(code); info.kind = Kind::Declaration; + // Do not form containers. + tokenizer.concrete = true; ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, @@ -323,6 +328,8 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta match code { Code::Char('-') => { tokenizer.consume(code); + // Do not form containers. + tokenizer.concrete = true; ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, @@ -348,6 +355,8 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> S if info.index == info.buffer.len() { info.buffer.clear(); + // Do not form containers. + tokenizer.concrete = true; (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) } else { ( @@ -396,6 +405,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) { info.kind = Kind::Raw; + // Do not form containers. + tokenizer.concrete = true; continuation(tokenizer, code, info) } else if HTML_BLOCK_NAMES.contains(&name) { // Basic is assumed, no need to set `kind`. @@ -406,6 +417,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes None, ) } else { + // Do not form containers. + tokenizer.concrete = true; continuation(tokenizer, code, info) } } else { @@ -439,6 +452,8 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat match code { Code::Char('>') => { tokenizer.consume(code); + // Do not form containers. + tokenizer.concrete = true; (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) } _ => (State::Nok, None), @@ -695,6 +710,8 @@ fn complete_end(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes fn complete_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + // Do not form containers. + tokenizer.concrete = true; continuation(tokenizer, code, info) } Code::VirtualSpace | Code::Char('\t' | ' ') => { @@ -793,6 +810,8 @@ fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta tokenizer.exit(Token::HtmlFlow); // Feel free to interrupt. tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; (State::Ok, Some(vec![code])) } // To do: do not allow lazy lines. @@ -960,6 +979,8 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat tokenizer.exit(Token::HtmlFlow); // Feel free to interrupt. tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; (State::Ok, Some(vec![code])) } _ => { diff --git a/src/content/document.rs b/src/content/document.rs index b1f3083..feffb62 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -195,7 +195,7 @@ fn check_new_containers( // step 1 before creating the new block as a child of the last matched // block. if info.continued == info.stack.len() { - println!(" to do: concrete? interrupt?"); + println!(" to do: interrupt ({:?})?", tokenizer.interrupt); // // No need to `check` whether there’s a container, of `exitContainers` // // would be moot. // // We can instead immediately `attempt` to parse one. @@ -203,12 +203,13 @@ fn check_new_containers( // return documentContinued(code) // } - // // If we have concrete content, such as block HTML or fenced code, - // // we can’t have containers “pierce” into them, so we can immediately - // // start. - // if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) { - // return flowStart(code) - // } + // If we have concrete content, such as block HTML or fenced code, + // we can’t have containers “pierce” into them, so we can immediately + // start. + if tokenizer.concrete { + println!(" concrete!"); + return flow_start(tokenizer, code, info); + } // // If we do have flow, it could still be a blank line, // // but we’d be interrupting it w/ a new container if there’s a current diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 64b66cc..efd8068 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -199,6 +199,9 @@ pub struct Tokenizer<'a> { /// /// Used when tokenizing [flow content][crate::content::flow]. pub interrupt: bool, + /// To do. + pub concrete: bool, + /// To do. pub lazy: bool, } @@ -220,6 +223,7 @@ impl<'a> Tokenizer<'a> { label_start_list_loose: vec![], media_list: vec![], interrupt: false, + concrete: false, lazy: false, resolvers: vec![], resolver_ids: vec![], diff --git a/tests/block_quote.rs b/tests/block_quote.rs index 908c724..02256e1 100644 --- a/tests/block_quote.rs +++ b/tests/block_quote.rs @@ -170,7 +170,7 @@ fn block_quote() { // "should not support interrupting many block quotes w/ paragraphs (2)" // ); - // To do: block quote (some bug). + // To do: block quote (exit flow if container exits). // assert_eq!( // micromark("> a\n\n> b"), // "
\n
a\n
\n
\n
\n

b

\n
", diff --git a/tests/code_fenced.rs b/tests/code_fenced.rs index d970c94..84d0d83 100644 --- a/tests/code_fenced.rs +++ b/tests/code_fenced.rs @@ -3,19 +3,17 @@ use micromark::micromark; #[test] fn code_fenced() { - // To do: concrete constructs (code fenced). - // assert_eq!( - // micromark("```\n<\n >\n```"), - // "
<\n >\n
", - // "should support fenced code w/ grave accents" - // ); + assert_eq!( + micromark("```\n<\n >\n```"), + "
<\n >\n
", + "should support fenced code w/ grave accents" + ); - // To do: concrete constructs (code fenced). - // assert_eq!( - // micromark("~~~\n<\n >\n~~~"), - // "
<\n >\n
", - // "should support fenced code w/ tildes" - // ); + assert_eq!( + micromark("~~~\n<\n >\n~~~"), + "
<\n >\n
", + "should support fenced code w/ tildes" + ); assert_eq!( micromark("``\nfoo\n``"), diff --git a/tests/html_flow.rs b/tests/html_flow.rs index e53b47e..796ced4 100644 --- a/tests/html_flow.rs +++ b/tests/html_flow.rs @@ -366,12 +366,11 @@ fn html_flow_4_declaration() { // Note about the lower letter: // - // To do: concrete constructs (html flow). - // assert_eq!( - // micromark_with_options("", DANGER), - // "", - // "should support blank lines in declarations" - // ); + assert_eq!( + micromark_with_options("", DANGER), + "", + "should support blank lines in declarations" + ); // To do: blockquote (lazy). // assert_eq!( @@ -1020,7 +1019,7 @@ fn html_flow_7_complete() { "should not support blank lines in complete" ); - // To do: blockquote (some bug). + // To do: containers: close flow when closing container. // assert_eq!( // micromark_with_options("> \n*bar*", DANGER), // "
\n\n
\n

bar

", -- cgit