aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-08 10:51:45 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-08 10:51:45 +0200
commitbd0cb0d0395abb06941960938aacc3639148a96c (patch)
tree1c69873ccb947e7f81a652b653dc5d6d557d49e3
parent92b42e06f943338ce8b54b7e22cbb116ff598fa6 (diff)
downloadmarkdown-rs-bd0cb0d0395abb06941960938aacc3639148a96c.tar.gz
markdown-rs-bd0cb0d0395abb06941960938aacc3639148a96c.tar.bz2
markdown-rs-bd0cb0d0395abb06941960938aacc3639148a96c.zip
Add support for concrete constructs
Diffstat (limited to '')
-rw-r--r--readme.md7
-rw-r--r--src/construct/code_fenced.rs19
-rw-r--r--src/construct/html_flow.rs25
-rw-r--r--src/content/document.rs15
-rw-r--r--src/tokenizer.rs4
-rw-r--r--tests/block_quote.rs2
-rw-r--r--tests/code_fenced.rs22
-rw-r--r--tests/html_flow.rs13
8 files changed, 71 insertions, 36 deletions
diff --git a/readme.md b/readme.md
index 0634eec..ad8aec0 100644
--- a/readme.md
+++ b/readme.md
@@ -48,8 +48,6 @@ cargo doc --document-private-items
- [ ] (5) Containers: this will be rather messy, and depends a lot on how
subtokenization is solved
-- [ ] (3) Concrete constructs: HTML or code (fenced) cannot be “pierced” into by
- containers
- [ ] (3) Lazy lines, in containers, in flow in a paragraph, a line does not
need to be indented
- [ ] (5) There’s a lot of rust-related choosing whether to pass (mutable)
@@ -129,7 +127,6 @@ cargo doc --document-private-items
- [ ] (3) Fix some block quote bugs
- [ ] (3) Lazy lines (`code indented`, `html flow`)
-- [ ] (3) Concrete (`html flow`)
- [ ] (8) list\
test (`character_reference`, `code_indented`, `heading_setext`,
`html_flow`, `thematic_break`)\
@@ -274,4 +271,6 @@ important.
- [x] (2) Fix resizing attention bug
- [x] (2) Fix interleaving of attention/label
- [x] (8) Add basic support for block quotes
-- [x] (1) use `char::REPLACEMENT_CHARACTER`?
+- [x] (1) Use `char::REPLACEMENT_CHARACTER`?
+- [x] (3) Add support for concrete constructs
+ (html (flow) or code (fenced) cannot be “pierced” into by containers)
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 617979f..e2165a9 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -175,6 +175,8 @@ struct Info {
prefix: usize,
/// Kind of fences.
kind: Kind,
+ /// To do.
+ concrete: bool,
}
/// Start of fenced code.
@@ -218,6 +220,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
Info {
prefix,
size: 0,
+ concrete: tokenizer.concrete,
kind: Kind::from_code(code),
},
)
@@ -264,6 +267,8 @@ fn info_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(Token::CodeFencedFence);
+ // Do not form containers.
+ tokenizer.concrete = true;
at_break(tokenizer, code, info)
}
_ => {
@@ -292,6 +297,8 @@ fn info_inside(
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceInfo);
tokenizer.exit(Token::CodeFencedFence);
+ // Do not form containers.
+ tokenizer.concrete = true;
at_break(tokenizer, code, info)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
@@ -322,6 +329,8 @@ fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(Token::CodeFencedFence);
+ // Do not form containers.
+ tokenizer.concrete = true;
at_break(tokenizer, code, info)
}
_ => {
@@ -345,6 +354,8 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceMeta);
tokenizer.exit(Token::CodeFencedFence);
+ // Do not form containers.
+ tokenizer.concrete = true;
at_break(tokenizer, code, info)
}
Code::Char('`') if info.kind == Kind::GraveAccent => (State::Nok, None),
@@ -366,12 +377,12 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult
let clone = info.clone();
match code {
- Code::None => after(tokenizer, code),
+ Code::None => after(tokenizer, code, info),
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt(
|t, c| close_begin(t, c, info),
|ok| {
if ok {
- Box::new(after)
+ Box::new(|t, c| after(t, c, clone))
} else {
Box::new(|t, c| content_before(t, c, clone))
}
@@ -557,9 +568,11 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateF
/// console.log('1')
/// ~~~|
/// ```
-fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+fn after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
tokenizer.exit(Token::CodeFenced);
// Feel free to interrupt.
tokenizer.interrupt = false;
+ // Restore previous `concrete`.
+ tokenizer.concrete = info.concrete;
(State::Ok, Some(vec![code]))
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index fde0a34..f30db3f 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -106,8 +106,6 @@ use crate::token::Token;
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
use crate::util::codes::{parse, serialize};
-// To do: mark as concrete (block quotes or lists can’t “pierce” into HTML).
-
/// Kind of HTML (flow).
#[derive(Debug, PartialEq)]
enum Kind {
@@ -195,6 +193,8 @@ struct Info {
index: usize,
/// Current quote, when in a double or single quoted attribute value.
quote: Option<QuoteKind>,
+ /// To do.
+ concrete: bool,
}
/// Start of HTML (flow), before optional whitespace.
@@ -240,6 +240,7 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
buffer: vec![],
index: 0,
quote: None,
+ concrete: tokenizer.concrete,
};
match code {
@@ -260,6 +261,8 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
Code::Char('?') => {
info.kind = Kind::Instruction;
tokenizer.consume(code);
+ // Do not form containers.
+ tokenizer.concrete = true;
// While we’re in an instruction instead of a declaration, we’re on a `?`
// right now, so we do need to search for `>`, similar to declarations.
(
@@ -305,6 +308,8 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> St
Code::Char('A'..='Z' | 'a'..='z') => {
tokenizer.consume(code);
info.kind = Kind::Declaration;
+ // Do not form containers.
+ tokenizer.concrete = true;
(
State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
@@ -323,6 +328,8 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta
match code {
Code::Char('-') => {
tokenizer.consume(code);
+ // Do not form containers.
+ tokenizer.concrete = true;
(
State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
@@ -348,6 +355,8 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> S
if info.index == info.buffer.len() {
info.buffer.clear();
+ // Do not form containers.
+ tokenizer.concrete = true;
(State::Fn(Box::new(|t, c| continuation(t, c, info))), None)
} else {
(
@@ -396,6 +405,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) {
info.kind = Kind::Raw;
+ // Do not form containers.
+ tokenizer.concrete = true;
continuation(tokenizer, code, info)
} else if HTML_BLOCK_NAMES.contains(&name) {
// Basic is assumed, no need to set `kind`.
@@ -406,6 +417,8 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
None,
)
} else {
+ // Do not form containers.
+ tokenizer.concrete = true;
continuation(tokenizer, code, info)
}
} else {
@@ -439,6 +452,8 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
match code {
Code::Char('>') => {
tokenizer.consume(code);
+ // Do not form containers.
+ tokenizer.concrete = true;
(State::Fn(Box::new(|t, c| continuation(t, c, info))), None)
}
_ => (State::Nok, None),
@@ -695,6 +710,8 @@ fn complete_end(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes
fn complete_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ // Do not form containers.
+ tokenizer.concrete = true;
continuation(tokenizer, code, info)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
@@ -793,6 +810,8 @@ fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta
tokenizer.exit(Token::HtmlFlow);
// Feel free to interrupt.
tokenizer.interrupt = false;
+ // Restore previous `concrete`.
+ tokenizer.concrete = info.concrete;
(State::Ok, Some(vec![code]))
}
// To do: do not allow lazy lines.
@@ -960,6 +979,8 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
tokenizer.exit(Token::HtmlFlow);
// Feel free to interrupt.
tokenizer.interrupt = false;
+ // Restore previous `concrete`.
+ tokenizer.concrete = info.concrete;
(State::Ok, Some(vec![code]))
}
_ => {
diff --git a/src/content/document.rs b/src/content/document.rs
index b1f3083..feffb62 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -195,7 +195,7 @@ fn check_new_containers(
// step 1 before creating the new block as a child of the last matched
// block.
if info.continued == info.stack.len() {
- println!(" to do: concrete? interrupt?");
+ println!(" to do: interrupt ({:?})?", tokenizer.interrupt);
// // No need to `check` whether there’s a container, of `exitContainers`
// // would be moot.
// // We can instead immediately `attempt` to parse one.
@@ -203,12 +203,13 @@ fn check_new_containers(
// return documentContinued(code)
// }
- // // If we have concrete content, such as block HTML or fenced code,
- // // we can’t have containers “pierce” into them, so we can immediately
- // // start.
- // if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) {
- // return flowStart(code)
- // }
+ // If we have concrete content, such as block HTML or fenced code,
+ // we can’t have containers “pierce” into them, so we can immediately
+ // start.
+ if tokenizer.concrete {
+ println!(" concrete!");
+ return flow_start(tokenizer, code, info);
+ }
// // If we do have flow, it could still be a blank line,
// // but we’d be interrupting it w/ a new container if there’s a current
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 64b66cc..efd8068 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -199,6 +199,9 @@ pub struct Tokenizer<'a> {
///
/// Used when tokenizing [flow content][crate::content::flow].
pub interrupt: bool,
+ /// To do.
+ pub concrete: bool,
+ /// To do.
pub lazy: bool,
}
@@ -220,6 +223,7 @@ impl<'a> Tokenizer<'a> {
label_start_list_loose: vec![],
media_list: vec![],
interrupt: false,
+ concrete: false,
lazy: false,
resolvers: vec![],
resolver_ids: vec![],
diff --git a/tests/block_quote.rs b/tests/block_quote.rs
index 908c724..02256e1 100644
--- a/tests/block_quote.rs
+++ b/tests/block_quote.rs
@@ -170,7 +170,7 @@ fn block_quote() {
// "should not support interrupting many block quotes w/ paragraphs (2)"
// );
- // To do: block quote (some bug).
+ // To do: block quote (exit flow if container exits).
// assert_eq!(
// micromark("> a\n\n> b"),
// "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<blockquote>\n<p>b</p>\n</blockquote>",
diff --git a/tests/code_fenced.rs b/tests/code_fenced.rs
index d970c94..84d0d83 100644
--- a/tests/code_fenced.rs
+++ b/tests/code_fenced.rs
@@ -3,19 +3,17 @@ use micromark::micromark;
#[test]
fn code_fenced() {
- // To do: concrete constructs (code fenced).
- // assert_eq!(
- // micromark("```\n<\n >\n```"),
- // "<pre><code>&lt;\n &gt;\n</code></pre>",
- // "should support fenced code w/ grave accents"
- // );
+ assert_eq!(
+ micromark("```\n<\n >\n```"),
+ "<pre><code>&lt;\n &gt;\n</code></pre>",
+ "should support fenced code w/ grave accents"
+ );
- // To do: concrete constructs (code fenced).
- // assert_eq!(
- // micromark("~~~\n<\n >\n~~~"),
- // "<pre><code>&lt;\n &gt;\n</code></pre>",
- // "should support fenced code w/ tildes"
- // );
+ assert_eq!(
+ micromark("~~~\n<\n >\n~~~"),
+ "<pre><code>&lt;\n &gt;\n</code></pre>",
+ "should support fenced code w/ tildes"
+ );
assert_eq!(
micromark("``\nfoo\n``"),
diff --git a/tests/html_flow.rs b/tests/html_flow.rs
index e53b47e..796ced4 100644
--- a/tests/html_flow.rs
+++ b/tests/html_flow.rs
@@ -366,12 +366,11 @@ fn html_flow_4_declaration() {
// Note about the lower letter:
// <https://github.com/commonmark/commonmark-spec/pull/621>
- // To do: concrete constructs (html flow).
- // assert_eq!(
- // micromark_with_options("<!a\n \n \n>", DANGER),
- // "<!a\n \n \n>",
- // "should support blank lines in declarations"
- // );
+ assert_eq!(
+ micromark_with_options("<!a\n \n \n>", DANGER),
+ "<!a\n \n \n>",
+ "should support blank lines in declarations"
+ );
// To do: blockquote (lazy).
// assert_eq!(
@@ -1020,7 +1019,7 @@ fn html_flow_7_complete() {
"should not support blank lines in complete"
);
- // To do: blockquote (some bug).
+ // To do: containers: close flow when closing container.
// assert_eq!(
// micromark_with_options("> <a>\n*bar*", DANGER),
// "<blockquote>\n<a>\n</blockquote>\n<p><em>bar</em></p>",