aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-08 10:51:45 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-08 10:51:45 +0200
commitbd0cb0d0395abb06941960938aacc3639148a96c (patch)
tree1c69873ccb947e7f81a652b653dc5d6d557d49e3 /src
parent92b42e06f943338ce8b54b7e22cbb116ff598fa6 (diff)
downloadmarkdown-rs-bd0cb0d0395abb06941960938aacc3639148a96c.tar.gz
markdown-rs-bd0cb0d0395abb06941960938aacc3639148a96c.tar.bz2
markdown-rs-bd0cb0d0395abb06941960938aacc3639148a96c.zip
Add support for concrete constructs
Diffstat (limited to 'src')
-rw-r--r--src/construct/code_fenced.rs19
-rw-r--r--src/construct/html_flow.rs25
-rw-r--r--src/content/document.rs15
-rw-r--r--src/tokenizer.rs4
4 files changed, 51 insertions, 12 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 617979f..e2165a9 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -175,6 +175,8 @@ struct Info {
prefix: usize,
/// Kind of fences.
kind: Kind,
+ /// To do.
+ concrete: bool,
}
/// Start of fenced code.
@@ -218,6 +220,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
Info {
prefix,
size: 0,
+ concrete: tokenizer.concrete,
kind: Kind::from_code(code),
},
)
@@ -264,6 +267,8 @@ fn info_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(Token::CodeFencedFence);
+ // Do not form containers.
+ tokenizer.concrete = true;
at_break(tokenizer, code, info)
}
_ => {
@@ -292,6 +297,8 @@ fn info_inside(
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceInfo);
tokenizer.exit(Token::CodeFencedFence);
+ // Do not form containers.
+ tokenizer.concrete = true;
at_break(tokenizer, code, info)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
@@ -322,6 +329,8 @@ fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(Token::CodeFencedFence);
+ // Do not form containers.
+ tokenizer.concrete = true;
at_break(tokenizer, code, info)
}
_ => {
@@ -345,6 +354,8 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceMeta);
tokenizer.exit(Token::CodeFencedFence);
+ // Do not form containers.
+ tokenizer.concrete = true;
at_break(tokenizer, code, info)
}
Code::Char('`') if info.kind == Kind::GraveAccent => (State::Nok, None),
@@ -366,12 +377,12 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult
let clone = info.clone();
match code {
- Code::None => after(tokenizer, code),
+ Code::None => after(tokenizer, code, info),
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt(
|t, c| close_begin(t, c, info),
|ok| {
if ok {
- Box::new(after)
+ Box::new(|t, c| after(t, c, clone))
} else {
Box::new(|t, c| content_before(t, c, clone))
}
@@ -557,9 +568,11 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateF
/// console.log('1')
/// ~~~|
/// ```
-fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+fn after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
tokenizer.exit(Token::CodeFenced);
// Feel free to interrupt.
tokenizer.interrupt = false;
+ // Restore previous `concrete`.
+ tokenizer.concrete = info.concrete;
(State::Ok, Some(vec![code]))
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index fde0a34..f30db3f 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -106,8 +106,6 @@ use crate::token::Token;
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
use crate::util::codes::{parse, serialize};
-// To do: mark as concrete (block quotes or lists can’t “pierce” into HTML).
-
/// Kind of HTML (flow).
#[derive(Debug, PartialEq)]
enum Kind {
@@ -195,6 +193,8 @@ struct Info {
index: usize,
/// Current quote, when in a double or single quoted attribute value.
quote: Option<QuoteKind>,
+ /// To do.
+ concrete: bool,
}
/// Start of HTML (flow), before optional whitespace.
@@ -240,6 +240,7 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
buffer: vec![],
index: 0,
quote: None,
+ concrete: tokenizer.concrete,
};
match code {
@@ -260,6 +261,8 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
Code::Char('?') => {
info.kind = Kind::Instruction;
tokenizer.consume(code);
+ // Do not form containers.
+ tokenizer.concrete = true;
// While we’re in an instruction instead of a declaration, we’re on a `?`
// right now, so we do need to search for `>`, similar to declarations.
(
@@ -305,6 +308,8 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> St
Code::Char('A'..='Z' | 'a'..='z') => {
tokenizer.consume(code);
info.kind = Kind::Declaration;
+ // Do not form containers.
+ tokenizer.concrete = true;
(
State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
@@ -323,6 +328,8 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta
match code {
Code::Char('-') => {
tokenizer.consume(code);
+ // Do not form containers.
+ tokenizer.concrete = true;
(
State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
@@ -348,6 +355,8 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> S
if info.index == info.buffer.len() {
info.buffer.clear();
+ // Do not form containers.
+ tokenizer.concrete = true;
(State::Fn(Box::new(|t, c| continuation(t, c, info))), None)
} else {
(
@@ -396,6 +405,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) {
info.kind = Kind::Raw;
+ // Do not form containers.
+ tokenizer.concrete = true;
continuation(tokenizer, code, info)
} else if HTML_BLOCK_NAMES.contains(&name) {
// Basic is assumed, no need to set `kind`.
@@ -406,6 +417,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
None,
)
} else {
+ // Do not form containers.
+ tokenizer.concrete = true;
continuation(tokenizer, code, info)
}
} else {
@@ -439,6 +452,8 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
match code {
Code::Char('>') => {
tokenizer.consume(code);
+ // Do not form containers.
+ tokenizer.concrete = true;
(State::Fn(Box::new(|t, c| continuation(t, c, info))), None)
}
_ => (State::Nok, None),
@@ -695,6 +710,8 @@ fn complete_end(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes
fn complete_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ // Do not form containers.
+ tokenizer.concrete = true;
continuation(tokenizer, code, info)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
@@ -793,6 +810,8 @@ fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta
tokenizer.exit(Token::HtmlFlow);
// Feel free to interrupt.
tokenizer.interrupt = false;
+ // Restore previous `concrete`.
+ tokenizer.concrete = info.concrete;
(State::Ok, Some(vec![code]))
}
// To do: do not allow lazy lines.
@@ -960,6 +979,8 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
tokenizer.exit(Token::HtmlFlow);
// Feel free to interrupt.
tokenizer.interrupt = false;
+ // Restore previous `concrete`.
+ tokenizer.concrete = info.concrete;
(State::Ok, Some(vec![code]))
}
_ => {
diff --git a/src/content/document.rs b/src/content/document.rs
index b1f3083..feffb62 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -195,7 +195,7 @@ fn check_new_containers(
// step 1 before creating the new block as a child of the last matched
// block.
if info.continued == info.stack.len() {
- println!(" to do: concrete? interrupt?");
+ println!(" to do: interrupt ({:?})?", tokenizer.interrupt);
// // No need to `check` whether there’s a container, of `exitContainers`
// // would be moot.
// // We can instead immediately `attempt` to parse one.
@@ -203,12 +203,13 @@ fn check_new_containers(
// return documentContinued(code)
// }
- // // If we have concrete content, such as block HTML or fenced code,
- // // we can’t have containers “pierce” into them, so we can immediately
- // // start.
- // if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) {
- // return flowStart(code)
- // }
+ // If we have concrete content, such as block HTML or fenced code,
+ // we can’t have containers “pierce” into them, so we can immediately
+ // start.
+ if tokenizer.concrete {
+ println!(" concrete!");
+ return flow_start(tokenizer, code, info);
+ }
// // If we do have flow, it could still be a blank line,
// // but we’d be interrupting it w/ a new container if there’s a current
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 64b66cc..efd8068 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -199,6 +199,9 @@ pub struct Tokenizer<'a> {
///
/// Used when tokenizing [flow content][crate::content::flow].
pub interrupt: bool,
+ /// To do.
+ pub concrete: bool,
+ /// To do.
pub lazy: bool,
}
@@ -220,6 +223,7 @@ impl<'a> Tokenizer<'a> {
label_start_list_loose: vec![],
media_list: vec![],
interrupt: false,
+ concrete: false,
lazy: false,
resolvers: vec![],
resolver_ids: vec![],