diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-08 10:51:45 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-08 10:51:45 +0200 |
commit | bd0cb0d0395abb06941960938aacc3639148a96c (patch) | |
tree | 1c69873ccb947e7f81a652b653dc5d6d557d49e3 /src | |
parent | 92b42e06f943338ce8b54b7e22cbb116ff598fa6 (diff) | |
download | markdown-rs-bd0cb0d0395abb06941960938aacc3639148a96c.tar.gz markdown-rs-bd0cb0d0395abb06941960938aacc3639148a96c.tar.bz2 markdown-rs-bd0cb0d0395abb06941960938aacc3639148a96c.zip |
Add support for concrete constructs
Diffstat (limited to '')
-rw-r--r-- | src/construct/code_fenced.rs | 19 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 25 | ||||
-rw-r--r-- | src/content/document.rs | 15 | ||||
-rw-r--r-- | src/tokenizer.rs | 4 |
4 files changed, 51 insertions, 12 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 617979f..e2165a9 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -175,6 +175,8 @@ struct Info { prefix: usize, /// Kind of fences. kind: Kind, + /// To do. + concrete: bool, } /// Start of fenced code. @@ -218,6 +220,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult Info { prefix, size: 0, + concrete: tokenizer.concrete, kind: Kind::from_code(code), }, ) @@ -264,6 +267,8 @@ fn info_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } _ => { @@ -292,6 +297,8 @@ fn info_inside( tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceInfo); tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } Code::VirtualSpace | Code::Char('\t' | ' ') => { @@ -322,6 +329,8 @@ fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } _ => { @@ -345,6 +354,8 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { tokenizer.exit(Token::Data); tokenizer.exit(Token::CodeFencedFenceMeta); tokenizer.exit(Token::CodeFencedFence); + // Do not form containers. + tokenizer.concrete = true; at_break(tokenizer, code, info) } Code::Char('`') if info.kind == Kind::GraveAccent => (State::Nok, None), @@ -366,12 +377,12 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult let clone = info.clone(); match code { - Code::None => after(tokenizer, code), + Code::None => after(tokenizer, code, info), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt( |t, c| close_begin(t, c, info), |ok| { if ok { - Box::new(after) + Box::new(|t, c| after(t, c, clone)) } else { Box::new(|t, c| content_before(t, c, clone)) } @@ -557,9 +568,11 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateF /// console.log('1') /// ~~~| /// ``` -fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +fn after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { tokenizer.exit(Token::CodeFenced); // Feel free to interrupt. tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; (State::Ok, Some(vec![code])) } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index fde0a34..f30db3f 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -106,8 +106,6 @@ use crate::token::Token; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; use crate::util::codes::{parse, serialize}; -// To do: mark as concrete (block quotes or lists can’t “pierce” into HTML). - /// Kind of HTML (flow). #[derive(Debug, PartialEq)] enum Kind { @@ -195,6 +193,8 @@ struct Info { index: usize, /// Current quote, when in a double or single quoted attribute value. quote: Option<QuoteKind>, + /// To do. + concrete: bool, } /// Start of HTML (flow), before optional whitespace. @@ -240,6 +240,7 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { buffer: vec![], index: 0, quote: None, + concrete: tokenizer.concrete, }; match code { @@ -260,6 +261,8 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { Code::Char('?') => { info.kind = Kind::Instruction; tokenizer.consume(code); + // Do not form containers. + tokenizer.concrete = true; // While we’re in an instruction instead of a declaration, we’re on a `?` // right now, so we do need to search for `>`, similar to declarations. ( @@ -305,6 +308,8 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> St Code::Char('A'..='Z' | 'a'..='z') => { tokenizer.consume(code); info.kind = Kind::Declaration; + // Do not form containers. + tokenizer.concrete = true; ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, @@ -323,6 +328,8 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta match code { Code::Char('-') => { tokenizer.consume(code); + // Do not form containers. + tokenizer.concrete = true; ( State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, @@ -348,6 +355,8 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> S if info.index == info.buffer.len() { info.buffer.clear(); + // Do not form containers. + tokenizer.concrete = true; (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) } else { ( @@ -396,6 +405,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) { info.kind = Kind::Raw; + // Do not form containers. + tokenizer.concrete = true; continuation(tokenizer, code, info) } else if HTML_BLOCK_NAMES.contains(&name) { // Basic is assumed, no need to set `kind`. @@ -406,6 +417,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes None, ) } else { + // Do not form containers. + tokenizer.concrete = true; continuation(tokenizer, code, info) } } else { @@ -439,6 +452,8 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat match code { Code::Char('>') => { tokenizer.consume(code); + // Do not form containers. + tokenizer.concrete = true; (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) } _ => (State::Nok, None), @@ -695,6 +710,8 @@ fn complete_end(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes fn complete_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + // Do not form containers. + tokenizer.concrete = true; continuation(tokenizer, code, info) } Code::VirtualSpace | Code::Char('\t' | ' ') => { @@ -793,6 +810,8 @@ fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta tokenizer.exit(Token::HtmlFlow); // Feel free to interrupt. tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; (State::Ok, Some(vec![code])) } // To do: do not allow lazy lines. @@ -960,6 +979,8 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat tokenizer.exit(Token::HtmlFlow); // Feel free to interrupt. tokenizer.interrupt = false; + // Restore previous `concrete`. + tokenizer.concrete = info.concrete; (State::Ok, Some(vec![code])) } _ => { diff --git a/src/content/document.rs b/src/content/document.rs index b1f3083..feffb62 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -195,7 +195,7 @@ fn check_new_containers( // step 1 before creating the new block as a child of the last matched // block. if info.continued == info.stack.len() { - println!(" to do: concrete? interrupt?"); + println!(" to do: interrupt ({:?})?", tokenizer.interrupt); // // No need to `check` whether there’s a container, of `exitContainers` // // would be moot. // // We can instead immediately `attempt` to parse one. @@ -203,12 +203,13 @@ fn check_new_containers( // return documentContinued(code) // } - // // If we have concrete content, such as block HTML or fenced code, - // // we can’t have containers “pierce” into them, so we can immediately - // // start. - // if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) { - // return flowStart(code) - // } + // If we have concrete content, such as block HTML or fenced code, + // we can’t have containers “pierce” into them, so we can immediately + // start. + if tokenizer.concrete { + println!(" concrete!"); + return flow_start(tokenizer, code, info); + } // // If we do have flow, it could still be a blank line, // // but we’d be interrupting it w/ a new container if there’s a current diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 64b66cc..efd8068 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -199,6 +199,9 @@ pub struct Tokenizer<'a> { /// /// Used when tokenizing [flow content][crate::content::flow]. pub interrupt: bool, + /// To do. + pub concrete: bool, + /// To do. pub lazy: bool, } @@ -220,6 +223,7 @@ impl<'a> Tokenizer<'a> { label_start_list_loose: vec![], media_list: vec![], interrupt: false, + concrete: false, lazy: false, resolvers: vec![], resolver_ids: vec![], |