aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-29 17:15:17 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-29 17:15:17 +0200
commita056a7b3716bd4cc78e47e64f7d735c5bd5b82e6 (patch)
tree18395a3438441f5ddeca984fab0db8c9f8133a2c
parentfa37356074c1bc3a0f74b6f6a22e038b7842ff4d (diff)
downloadmarkdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.tar.gz
markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.tar.bz2
markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.zip
Fix a bunch of bugs with definitions, references
* Fix bug where whitespace after `:` was not allowed, it is * Fix bug where escapes in labels did not work due to typo * Fix to prefer first definition * Fix whitespace after definitions * Fix matching by adding normalizing * Fix reference from being output as data
Diffstat (limited to '')
-rw-r--r--readme.md6
-rw-r--r--src/compiler.rs37
-rw-r--r--src/construct/definition.rs2
-rw-r--r--src/construct/partial_label.rs2
-rw-r--r--tests/definition.rs148
5 files changed, 92 insertions, 103 deletions
diff --git a/readme.md b/readme.md
index 16f81d9..7c70905 100644
--- a/readme.md
+++ b/readme.md
@@ -143,15 +143,15 @@ cargo doc --document-private-items
#### Parse
- [ ] (1) Parse initial and final space_or_tab of paragraphs (in text)\
- test (`code_indented`, `hard_break_escape`, `hard_break_trailing`,
+ test (`code_indented`, `definition`, `hard_break_escape`, `hard_break_trailing`,
`heading_atx`, `heading_setext`, `html_flow`, `misc_soft_break`,
`misc_tabs`, `thematic_break`)
-- [ ] (3) Interrupting (html flow complete)
+- [ ] (3) Interrupting (html flow complete, definition + code_indented)
- [ ] (5) attention\
test (`character_reference`, `hard_break_escape`, `hard_break_trailing`,
`heading_atx`, `heading_setext`, `html_flow`, `thematic_break`)\
- [ ] (8) block quote\
- test (`code_fenced`, `code_indented`, `heading_atx`, `heading_setext`,
+ test (`code_fenced`, `definition`, `code_indented`, `heading_atx`, `heading_setext`,
`html_flow`, `misc_default_line_ending`, `thematic_break`)
- [ ] (8) list\
test (`character_reference`, `code_indented`, `heading_setext`,
diff --git a/src/compiler.rs b/src/compiler.rs
index 3dd6ae4..bb2359e 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -2,6 +2,7 @@
use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC};
use crate::construct::character_reference::Kind as CharacterReferenceKind;
use crate::tokenizer::{Code, Event, EventType, TokenType};
+use crate::util::normalize_identifier::normalize_identifier;
use crate::util::{
decode_character_reference::{decode_named, decode_numeric},
encode::encode,
@@ -431,16 +432,14 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
TokenType::DefinitionDestinationString,
on_enter_definition_destination_string,
);
+ enter_map.insert(TokenType::ReferenceString, on_enter_buffer);
enter_map.insert(TokenType::DefinitionLabelString, on_enter_buffer);
enter_map.insert(TokenType::DefinitionTitleString, on_enter_buffer);
let mut exit_map: Map = HashMap::new();
exit_map.insert(TokenType::Label, on_exit_label);
exit_map.insert(TokenType::LabelText, on_exit_label_text);
- exit_map.insert(
- TokenType::ReferenceString,
- on_exit_reference_destination_string,
- );
+ exit_map.insert(TokenType::ReferenceString, on_exit_reference_string);
exit_map.insert(
TokenType::ResourceDestinationString,
on_exit_resource_destination_string,
@@ -525,11 +524,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
&exit_map
};
- println!(
- "handle {:?}:{:?} ({:?})",
- event.event_type, event.token_type, index
- );
-
if let Some(func) = map.get(&event.token_type) {
func(context, event);
}
@@ -561,8 +555,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
index += 1;
}
- println!("xxx: {:?}", definition_indices);
-
index = 0;
let jump_default = (events.len(), events.len());
let mut definition_index = 0;
@@ -572,12 +564,12 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
while index < events.len() {
if index == jump.0 {
- println!("jump {:?}", jump);
index = jump.1 + 1;
definition_index += 1;
jump = definition_indices
.get(definition_index)
.unwrap_or(&jump_default);
+ context.slurp_one_line_ending = true;
} else {
handle(&mut context, index);
index += 1;
@@ -683,7 +675,9 @@ fn on_exit_label_text(context: &mut CompileContext, _event: &Event) {
));
}
-fn on_exit_reference_destination_string(context: &mut CompileContext, _event: &Event) {
+fn on_exit_reference_string(context: &mut CompileContext, _event: &Event) {
+ // Drop stuff.
+ context.resume();
let media = context.media_stack.last_mut().unwrap();
media.reference_id = Some(serialize(
context.codes,
@@ -720,7 +714,10 @@ fn on_exit_media(context: &mut CompileContext, _event: &Event) {
// context.tags = is_in_image;
let media = context.media_stack.pop().unwrap();
- let id = media.reference_id.or(media.label_id);
+ let id = media
+ .reference_id
+ .or(media.label_id)
+ .map(|id| normalize_identifier(&id));
let label = media.label.unwrap();
let definition = id.and_then(|id| context.definitions.get(&id));
let destination = if let Some(definition) = definition {
@@ -734,8 +731,6 @@ fn on_exit_media(context: &mut CompileContext, _event: &Event) {
&media.title
};
- println!("media: {:?} {:?}", destination, title);
-
let destination = if let Some(destination) = destination {
destination.clone()
} else {
@@ -1047,8 +1042,7 @@ fn on_exit_definition_label_string(context: &mut CompileContext, _event: &Event)
// Discard label, use the source content instead.
context.resume();
let definition = context.media_stack.last_mut().unwrap();
- // To do: put this on `reference_id` instead?
- definition.label_id = Some(serialize(
+ definition.reference_id = Some(serialize(
context.codes,
&from_exit_event(context.events, context.index),
false,
@@ -1063,13 +1057,14 @@ fn on_exit_definition_title_string(context: &mut CompileContext, _event: &Event)
fn on_exit_definition(context: &mut CompileContext, _event: &Event) {
let definition = context.media_stack.pop().unwrap();
- let label_id = definition.label_id.unwrap();
+ let reference_id = normalize_identifier(&definition.reference_id.unwrap());
let destination = definition.destination;
let title = definition.title;
context.resume();
+
context
.definitions
- .insert(label_id, Definition { destination, title });
- context.slurp_one_line_ending = true;
+ .entry(reference_id)
+ .or_insert(Definition { destination, title });
}
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 5e80a93..aca22a6 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -149,7 +149,7 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::DefinitionMarker);
(
State::Fn(Box::new(
- tokenizer.go(space_or_tab_one_line_ending(), destination_before),
+ tokenizer.attempt_opt(space_or_tab_one_line_ending(), destination_before),
)),
None,
)
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 2e8e950..1e4d7f2 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -181,7 +181,7 @@ fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult
info.size += 1;
(State::Fn(Box::new(|t, c| label(t, c, info))), None)
}
- Code::Char('/') => {
+ Code::Char('\\') => {
tokenizer.consume(code);
info.size += 1;
if !info.data {
diff --git a/tests/definition.rs b/tests/definition.rs
index c112a96..3edf687 100644
--- a/tests/definition.rs
+++ b/tests/definition.rs
@@ -27,12 +27,17 @@ fn definition() {
"should support whitespace and line endings in definitions"
);
- // To do: some bug.
- // assert_eq!(
- // micromark("[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]"),
- // "<p><a href=\"my_(url)\" title=\"title (with parens)\">Foo*bar]</a></p>",
- // "should support complex definitions (1)"
- // );
+ assert_eq!(
+ micromark("[a]:b 'c'\n\n[a]"),
+ "<p><a href=\"b\" title=\"c\">a</a></p>",
+ "should support no whitespace after `:` in definitions"
+ );
+
+ assert_eq!(
+ micromark("[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]"),
+ "<p><a href=\"my_(url)\" title=\"title (with parens)\">Foo*bar]</a></p>",
+ "should support complex definitions (1)"
+ );
assert_eq!(
micromark("[Foo bar]:\n<my url>\n'title'\n\n[Foo bar]"),
@@ -82,33 +87,29 @@ fn definition() {
"should support character escapes in destinations and titles"
);
- // Some bug.
- // assert_eq!(
- // micromark("[foo]\n\n[foo]: url"),
- // "<p><a href=\"url\">foo</a></p>\n",
- // "should support a link before a definition"
- // );
+ assert_eq!(
+ micromark("[foo]\n\n[foo]: url"),
+ "<p><a href=\"url\">foo</a></p>\n",
+ "should support a link before a definition"
+ );
- // Some bug.
- // assert_eq!(
- // micromark("[foo]: first\n[foo]: second\n\n[foo]"),
- // "<p><a href=\"first\">foo</a></p>",
- // "should match w/ the first definition"
- // );
+ assert_eq!(
+ micromark("[foo]: first\n[foo]: second\n\n[foo]"),
+ "<p><a href=\"first\">foo</a></p>",
+ "should match w/ the first definition"
+ );
- // Some bug.
- // assert_eq!(
- // micromark("[FOO]: /url\n\n[Foo]"),
- // "<p><a href=\"/url\">Foo</a></p>",
- // "should match w/ case-insensitive (1)"
- // );
+ assert_eq!(
+ micromark("[FOO]: /url\n\n[Foo]"),
+ "<p><a href=\"/url\">Foo</a></p>",
+ "should match w/ case-insensitive (1)"
+ );
- // Some bug.
- // assert_eq!(
- // micromark("[ΑΓΩ]: /φου\n\n[αγω]"),
- // "<p><a href=\"/%CF%86%CE%BF%CF%85\">αγω</a></p>",
- // "should match w/ case-insensitive (2)"
- // );
+ assert_eq!(
+ micromark("[ΑΓΩ]: /φου\n\n[αγω]"),
+ "<p><a href=\"/%CF%86%CE%BF%CF%85\">αγω</a></p>",
+ "should match w/ case-insensitive (2)"
+ );
assert_eq!(
micromark("[foo]: /url"),
@@ -183,14 +184,13 @@ fn definition() {
"should not support setext heading underlines after definitions"
);
- // To do: some bug.
- // assert_eq!(
- // micromark(
- // "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]"
- // ),
- // "<p><a href=\"/foo-url\" title=\"foo\">foo</a>,\n<a href=\"/bar-url\" title=\"bar\">bar</a>,\n<a href=\"/baz-url\">baz</a></p>",
- // "should support definitions after definitions"
- // );
+ assert_eq!(
+ micromark(
+ "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]"
+ ),
+ "<p><a href=\"/foo-url\" title=\"foo\">foo</a>,\n<a href=\"/bar-url\" title=\"bar\">bar</a>,\n<a href=\"/baz-url\">baz</a></p>",
+ "should support definitions after definitions"
+ );
// To do: block quote.
// assert_eq!(
@@ -200,12 +200,11 @@ fn definition() {
// );
// Extra
- // To do: some bug.
- // assert_eq!(
- // micromark("[\\[\\+\\]]: example.com\n\nLink: [\\[\\+\\]]."),
- // "<p>Link: <a href=\"example.com\">[+]</a>.</p>",
- // "should match w/ character escapes"
- // );
+ assert_eq!(
+ micromark("[\\[\\+\\]]: example.com\n\nLink: [\\[\\+\\]]."),
+ "<p>Link: <a href=\"example.com\">[+]</a>.</p>",
+ "should match w/ character escapes"
+ );
assert_eq!(
micromark("[x]: \\\"&#x20;\\(\\)\\\"\n\n[x]"),
@@ -261,25 +260,23 @@ fn definition() {
"should support character escapes at the start of a title"
);
- // To do: some bug.
- // assert_eq!(
- // micromark("[x]: a \"\\\"\"\n\n[x]"),
- // "<p><a href=\"a\" title=\"\"\">x</a></p>",
- // "should support double quoted titles"
- // );
+ assert_eq!(
+ micromark("[x]: a \"'\"\n\n[x]"),
+ "<p><a href=\"a\" title=\"'\">x</a></p>",
+ "should support double quoted titles"
+ );
assert_eq!(
micromark("[x]: a '\"'\n\n[x]"),
"<p><a href=\"a\" title=\"&quot;\">x</a></p>",
- "should support double quoted titles"
+ "should support single quoted titles"
);
- // To do: some bug.
- // assert_eq!(
- // micromark("[x]: a (\"\")\n\n[x]"),
- // "<p><a href=\"a\" title=\"&quot;\"\">x</a></p>",
- // "should support paren enclosed titles"
- // );
+ assert_eq!(
+ micromark("[x]: a (\"')\n\n[x]"),
+ "<p><a href=\"a\" title=\"&quot;'\">x</a></p>",
+ "should support paren enclosed titles"
+ );
assert_eq!(
micromark("[x]: a(()\n\n[x]"),
@@ -305,12 +302,11 @@ fn definition() {
"should support trailing whitespace after a destination"
);
- // To do: some bug.
- // assert_eq!(
- // micromark("[x]: a \"\"X \t\n\n[x]"),
- // "<p><a href=\"a\" title=\"\"X>x</a></p>",
- // "should support trailing whitespace after a destination"
- // );
+ assert_eq!(
+ micromark("[x]: a \"X\" \t\n\n[x]"),
+ "<p><a href=\"a\" title=\"X\">x</a></p>",
+ "should support trailing whitespace after a title"
+ );
assert_eq!(
micromark("[&amp;&copy;&]: example.com/&amp;&copy;& \"&amp;&copy;&\"\n\n[&amp;&copy;&]"),
@@ -331,12 +327,11 @@ fn definition() {
);
// See: <https://github.com/commonmark/commonmark.js/issues/192>
- // To do: some bug.
- // assert_eq!(
- // micromark("[x]: <> \"\"\n[][x]"),
- // "<p><a href=\"\"></a></p>",
- // "should ignore an empty title"
- // );
+ assert_eq!(
+ micromark("[x]: <> \"\"\n[][x]"),
+ "<p><a href=\"\"></a></p>",
+ "should ignore an empty title"
+ );
assert_eq!(
micromark_with_options("[a]\n\n[a]: <b<c>", DANGER),
@@ -362,12 +357,11 @@ fn definition() {
"should not support an extra right paren (`)`) in a raw destination"
);
- // To do: some bug.
- // assert_eq!(
- // micromark("[a]\n\n[a]: a(1(2(3(4()))))b"),
- // "<p><a href=\"a(1(2(3(4()))))b\">a</a></p>\n",
- // "should support 4 or more sets of parens in a raw destination (link resources don’t)"
- // );
+ assert_eq!(
+ micromark("[a]\n\n[a]: a(1(2(3(4()))))b"),
+ "<p><a href=\"a(1(2(3(4()))))b\">a</a></p>\n",
+ "should support 4 or more sets of parens in a raw destination (link resources don’t)"
+ );
assert_eq!(
micromark("[a]\n\n[a]: aaa)"),
@@ -381,14 +375,14 @@ fn definition() {
"should not support a final (unbalanced) right paren in a raw destination “before” a title"
);
- // To do: some bug.
+ // To do: do not let code (indented) interrupt definitions.
// assert_eq!(
// micromark(" [a]: b \"c\"\n [d]: e\n [f]: g \"h\"\n [i]: j\n\t[k]: l (m)\n\t n [k] o"),
// "<p>n <a href=\"l\" title=\"m\">k</a> o</p>",
// "should support subsequent indented definitions"
// );
- // To do: some bug.
+ // To do: trim whitespace in paragraphs.
// assert_eq!(
// micromark("[a\n b]: c\n\n[a\n b]"),
// "<p><a href=\"c\">a\nb</a></p>",