Fix a bunch of bugs with definitions, references

* Fix bug where whitespace after `:` was not allowed, it is * Fix bug where escapes in labels did not work due to typo * Fix to prefer first definition * Fix whitespace after definitions * Fix matching by adding normalizing * Fix reference from being output as data
author: Titus Wormer <tituswormer@gmail.com> 2022-06-29 17:15:17 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-29 17:15:17 +0200
commit: a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6 (patch)
tree: 18395a3438441f5ddeca984fab0db8c9f8133a2c
parent: fa37356074c1bc3a0f74b6f6a22e038b7842ff4d (diff)
download: markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.tar.gz
markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.tar.bz2
markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.zip
5 files changed, 92 insertions, 103 deletions
diff --git a/readme.md b/readme.md
index 16f81d9..7c70905 100644
--- a/readme.md
+++ b/readme.md
@@ -143,15 +143,15 @@ cargo doc --document-private-items
 #### Parse
 
 - [ ] (1) Parse initial and final space_or_tab of paragraphs (in text)\
-       test (`code_indented`, `hard_break_escape`, `hard_break_trailing`,
+       test (`code_indented`, `definition`, `hard_break_escape`, `hard_break_trailing`,
       `heading_atx`, `heading_setext`, `html_flow`, `misc_soft_break`,
       `misc_tabs`, `thematic_break`)
-- [ ] (3) Interrupting (html flow complete)
+- [ ] (3) Interrupting (html flow complete, definition + code_indented)
 - [ ] (5) attention\
        test (`character_reference`, `hard_break_escape`, `hard_break_trailing`,
       `heading_atx`, `heading_setext`, `html_flow`, `thematic_break`)\
 - [ ] (8) block quote\
-       test (`code_fenced`, `code_indented`, `heading_atx`, `heading_setext`,
+       test (`code_fenced`, `definition`, `code_indented`, `heading_atx`, `heading_setext`,
       `html_flow`, `misc_default_line_ending`, `thematic_break`)
 - [ ] (8) list\
        test (`character_reference`, `code_indented`, `heading_setext`,
diff --git a/src/compiler.rs b/src/compiler.rs
index 3dd6ae4..bb2359e 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -2,6 +2,7 @@
 use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC};
 use crate::construct::character_reference::Kind as CharacterReferenceKind;
 use crate::tokenizer::{Code, Event, EventType, TokenType};
+use crate::util::normalize_identifier::normalize_identifier;
 use crate::util::{
     decode_character_reference::{decode_named, decode_numeric},
     encode::encode,
@@ -431,16 +432,14 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
         TokenType::DefinitionDestinationString,
         on_enter_definition_destination_string,
     );
+    enter_map.insert(TokenType::ReferenceString, on_enter_buffer);
     enter_map.insert(TokenType::DefinitionLabelString, on_enter_buffer);
     enter_map.insert(TokenType::DefinitionTitleString, on_enter_buffer);
 
     let mut exit_map: Map = HashMap::new();
     exit_map.insert(TokenType::Label, on_exit_label);
     exit_map.insert(TokenType::LabelText, on_exit_label_text);
-    exit_map.insert(
-        TokenType::ReferenceString,
-        on_exit_reference_destination_string,
-    );
+    exit_map.insert(TokenType::ReferenceString, on_exit_reference_string);
     exit_map.insert(
         TokenType::ResourceDestinationString,
         on_exit_resource_destination_string,
@@ -525,11 +524,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
             &exit_map
         };
 
-        println!(
-            "handle {:?}:{:?} ({:?})",
-            event.event_type, event.token_type, index
-        );
-
         if let Some(func) = map.get(&event.token_type) {
             func(context, event);
         }
@@ -561,8 +555,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
         index += 1;
     }
 
-    println!("xxx: {:?}", definition_indices);
-
     index = 0;
     let jump_default = (events.len(), events.len());
     let mut definition_index = 0;
@@ -572,12 +564,12 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
 
     while index < events.len() {
         if index == jump.0 {
-            println!("jump {:?}", jump);
             index = jump.1 + 1;
             definition_index += 1;
             jump = definition_indices
                 .get(definition_index)
                 .unwrap_or(&jump_default);
+            context.slurp_one_line_ending = true;
         } else {
             handle(&mut context, index);
             index += 1;
@@ -683,7 +675,9 @@ fn on_exit_label_text(context: &mut CompileContext, _event: &Event) {
     ));
 }
 
-fn on_exit_reference_destination_string(context: &mut CompileContext, _event: &Event) {
+fn on_exit_reference_string(context: &mut CompileContext, _event: &Event) {
+    // Drop stuff.
+    context.resume();
     let media = context.media_stack.last_mut().unwrap();
     media.reference_id = Some(serialize(
         context.codes,
@@ -720,7 +714,10 @@ fn on_exit_media(context: &mut CompileContext, _event: &Event) {
     // context.tags = is_in_image;
 
     let media = context.media_stack.pop().unwrap();
-    let id = media.reference_id.or(media.label_id);
+    let id = media
+        .reference_id
+        .or(media.label_id)
+        .map(|id| normalize_identifier(&id));
     let label = media.label.unwrap();
     let definition = id.and_then(|id| context.definitions.get(&id));
     let destination = if let Some(definition) = definition {
@@ -734,8 +731,6 @@ fn on_exit_media(context: &mut CompileContext, _event: &Event) {
         &media.title
     };
 
-    println!("media: {:?} {:?}", destination, title);
-
     let destination = if let Some(destination) = destination {
         destination.clone()
     } else {
@@ -1047,8 +1042,7 @@ fn on_exit_definition_label_string(context: &mut CompileContext, _event: &Event)
     // Discard label, use the source content instead.
     context.resume();
     let definition = context.media_stack.last_mut().unwrap();
-    // To do: put this on `reference_id` instead?
-    definition.label_id = Some(serialize(
+    definition.reference_id = Some(serialize(
         context.codes,
         &from_exit_event(context.events, context.index),
         false,
@@ -1063,13 +1057,14 @@ fn on_exit_definition_title_string(context: &mut CompileContext, _event: &Event)
 
 fn on_exit_definition(context: &mut CompileContext, _event: &Event) {
     let definition = context.media_stack.pop().unwrap();
-    let label_id = definition.label_id.unwrap();
+    let reference_id = normalize_identifier(&definition.reference_id.unwrap());
     let destination = definition.destination;
     let title = definition.title;
 
     context.resume();
+
     context
         .definitions
-        .insert(label_id, Definition { destination, title });
-    context.slurp_one_line_ending = true;
+        .entry(reference_id)
+        .or_insert(Definition { destination, title });
 }
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 5e80a93..aca22a6 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -149,7 +149,7 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             tokenizer.exit(TokenType::DefinitionMarker);
             (
                 State::Fn(Box::new(
-                    tokenizer.go(space_or_tab_one_line_ending(), destination_before),
+                    tokenizer.attempt_opt(space_or_tab_one_line_ending(), destination_before),
                 )),
                 None,
             )
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 2e8e950..1e4d7f2 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -181,7 +181,7 @@ fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult
             info.size += 1;
             (State::Fn(Box::new(|t, c| label(t, c, info))), None)
         }
-        Code::Char('/') => {
+        Code::Char('\\') => {
             tokenizer.consume(code);
             info.size += 1;
             if !info.data {
diff --git a/tests/definition.rs b/tests/definition.rs
index c112a96..3edf687 100644
--- a/tests/definition.rs
+++ b/tests/definition.rs
@@ -27,12 +27,17 @@ fn definition() {
         "should support whitespace and line endings in definitions"
     );
 
-    // To do: some bug.
-    // assert_eq!(
-    //     micromark("[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]"),
-    //     "<p><a href=\"my_(url)\" title=\"title (with parens)\">Foo*bar]</a></p>",
-    //     "should support complex definitions (1)"
-    // );
+    assert_eq!(
+        micromark("[a]:b 'c'\n\n[a]"),
+        "<p><a href=\"b\" title=\"c\">a</a></p>",
+        "should support no whitespace after `:` in definitions"
+    );
+
+    assert_eq!(
+        micromark("[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]"),
+        "<p><a href=\"my_(url)\" title=\"title (with parens)\">Foo*bar]</a></p>",
+        "should support complex definitions (1)"
+    );
 
     assert_eq!(
         micromark("[Foo bar]:\n<my url>\n'title'\n\n[Foo bar]"),
@@ -82,33 +87,29 @@ fn definition() {
         "should support character escapes in destinations and titles"
     );
 
-    // Some bug.
-    // assert_eq!(
-    //     micromark("[foo]\n\n[foo]: url"),
-    //     "<p><a href=\"url\">foo</a></p>\n",
-    //     "should support a link before a definition"
-    // );
+    assert_eq!(
+        micromark("[foo]\n\n[foo]: url"),
+        "<p><a href=\"url\">foo</a></p>\n",
+        "should support a link before a definition"
+    );
 
-    // Some bug.
-    // assert_eq!(
-    //     micromark("[foo]: first\n[foo]: second\n\n[foo]"),
-    //     "<p><a href=\"first\">foo</a></p>",
-    //     "should match w/ the first definition"
-    // );
+    assert_eq!(
+        micromark("[foo]: first\n[foo]: second\n\n[foo]"),
+        "<p><a href=\"first\">foo</a></p>",
+        "should match w/ the first definition"
+    );
 
-    // Some bug.
-    // assert_eq!(
-    //     micromark("[FOO]: /url\n\n[Foo]"),
-    //     "<p><a href=\"/url\">Foo</a></p>",
-    //     "should match w/ case-insensitive (1)"
-    // );
+    assert_eq!(
+        micromark("[FOO]: /url\n\n[Foo]"),
+        "<p><a href=\"/url\">Foo</a></p>",
+        "should match w/ case-insensitive (1)"
+    );
 
-    // Some bug.
-    // assert_eq!(
-    //     micromark("[ΑΓΩ]: /φου\n\n[αγω]"),
-    //     "<p><a href=\"/%CF%86%CE%BF%CF%85\">αγω</a></p>",
-    //     "should match w/ case-insensitive (2)"
-    // );
+    assert_eq!(
+        micromark("[ΑΓΩ]: /φου\n\n[αγω]"),
+        "<p><a href=\"/%CF%86%CE%BF%CF%85\">αγω</a></p>",
+        "should match w/ case-insensitive (2)"
+    );
 
     assert_eq!(
         micromark("[foo]: /url"),
@@ -183,14 +184,13 @@ fn definition() {
         "should not support setext heading underlines after definitions"
     );
 
-    // To do: some bug.
-    // assert_eq!(
-    //     micromark(
-    //     "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n  \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]"
-    //     ),
-    //     "<p><a href=\"/foo-url\" title=\"foo\">foo</a>,\n<a href=\"/bar-url\" title=\"bar\">bar</a>,\n<a href=\"/baz-url\">baz</a></p>",
-    //     "should support definitions after definitions"
-    // );
+    assert_eq!(
+        micromark(
+        "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n  \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]"
+        ),
+        "<p><a href=\"/foo-url\" title=\"foo\">foo</a>,\n<a href=\"/bar-url\" title=\"bar\">bar</a>,\n<a href=\"/baz-url\">baz</a></p>",
+        "should support definitions after definitions"
+    );
 
     // To do: block quote.
     // assert_eq!(
@@ -200,12 +200,11 @@ fn definition() {
     // );
 
     // Extra
-    // To do: some bug.
-    // assert_eq!(
-    //     micromark("[\\[\\+\\]]: example.com\n\nLink: [\\[\\+\\]]."),
-    //     "<p>Link: <a href=\"example.com\">[+]</a>.</p>",
-    //     "should match w/ character escapes"
-    // );
+    assert_eq!(
+        micromark("[\\[\\+\\]]: example.com\n\nLink: [\\[\\+\\]]."),
+        "<p>Link: <a href=\"example.com\">[+]</a>.</p>",
+        "should match w/ character escapes"
+    );
 
     assert_eq!(
         micromark("[x]: \\\"&#x20;\\(\\)\\\"\n\n[x]"),
@@ -261,25 +260,23 @@ fn definition() {
         "should support character escapes at the start of a title"
     );
 
-    // To do: some bug.
-    // assert_eq!(
-    //     micromark("[x]: a \"\\\"\"\n\n[x]"),
-    //     "<p><a href=\"a\" title=\"\"\">x</a></p>",
-    //     "should support double quoted titles"
-    // );
+    assert_eq!(
+        micromark("[x]: a \"'\"\n\n[x]"),
+        "<p><a href=\"a\" title=\"'\">x</a></p>",
+        "should support double quoted titles"
+    );
 
     assert_eq!(
         micromark("[x]: a '\"'\n\n[x]"),
         "<p><a href=\"a\" title=\"&quot;\">x</a></p>",
-        "should support double quoted titles"
+        "should support single quoted titles"
     );
 
-    // To do: some bug.
-    // assert_eq!(
-    //     micromark("[x]: a (\"\")\n\n[x]"),
-    //     "<p><a href=\"a\" title=\"&quot;\"\">x</a></p>",
-    //     "should support paren enclosed titles"
-    // );
+    assert_eq!(
+        micromark("[x]: a (\"')\n\n[x]"),
+        "<p><a href=\"a\" title=\"&quot;'\">x</a></p>",
+        "should support paren enclosed titles"
+    );
 
     assert_eq!(
         micromark("[x]: a(()\n\n[x]"),
@@ -305,12 +302,11 @@ fn definition() {
         "should support trailing whitespace after a destination"
     );
 
-    // To do: some bug.
-    // assert_eq!(
-    //     micromark("[x]: a \"\"X \t\n\n[x]"),
-    //     "<p><a href=\"a\" title=\"\"X>x</a></p>",
-    //     "should support trailing whitespace after a destination"
-    // );
+    assert_eq!(
+        micromark("[x]: a \"X\" \t\n\n[x]"),
+        "<p><a href=\"a\" title=\"X\">x</a></p>",
+        "should support trailing whitespace after a title"
+    );
 
     assert_eq!(
         micromark("[&amp;&copy;&]: example.com/&amp;&copy;& \"&amp;&copy;&\"\n\n[&amp;&copy;&]"),
@@ -331,12 +327,11 @@ fn definition() {
     );
 
     // See: <https://github.com/commonmark/commonmark.js/issues/192>
-    // To do: some bug.
-    // assert_eq!(
-    //     micromark("[x]: <> \"\"\n[][x]"),
-    //     "<p><a href=\"\"></a></p>",
-    //     "should ignore an empty title"
-    // );
+    assert_eq!(
+        micromark("[x]: <> \"\"\n[][x]"),
+        "<p><a href=\"\"></a></p>",
+        "should ignore an empty title"
+    );
 
     assert_eq!(
         micromark_with_options("[a]\n\n[a]: <b<c>", DANGER),
@@ -362,12 +357,11 @@ fn definition() {
         "should not support an extra right paren (`)`) in a raw destination"
     );
 
-    // To do: some bug.
-    // assert_eq!(
-    //     micromark("[a]\n\n[a]: a(1(2(3(4()))))b"),
-    //     "<p><a href=\"a(1(2(3(4()))))b\">a</a></p>\n",
-    //     "should support 4 or more sets of parens in a raw destination (link resources don’t)"
-    // );
+    assert_eq!(
+        micromark("[a]\n\n[a]: a(1(2(3(4()))))b"),
+        "<p><a href=\"a(1(2(3(4()))))b\">a</a></p>\n",
+        "should support 4 or more sets of parens in a raw destination (link resources don’t)"
+    );
 
     assert_eq!(
         micromark("[a]\n\n[a]: aaa)"),
@@ -381,14 +375,14 @@ fn definition() {
         "should not support a final (unbalanced) right paren in a raw destination “before” a title"
     );
 
-    // To do: some bug.
+    // To do: do not let code (indented) interrupt definitions.
     // assert_eq!(
     //     micromark(" [a]: b \"c\"\n  [d]: e\n   [f]: g \"h\"\n    [i]: j\n\t[k]: l (m)\n\t n [k] o"),
     //     "<p>n <a href=\"l\" title=\"m\">k</a> o</p>",
     //     "should support subsequent indented definitions"
     // );
 
-    // To do: some bug.
+    // To do: trim whitespace in paragraphs.
     // assert_eq!(
     //     micromark("[a\n  b]: c\n\n[a\n  b]"),
     //     "<p><a href=\"c\">a\nb</a></p>",
author	Titus Wormer <tituswormer@gmail.com>	2022-06-29 17:15:17 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-29 17:15:17 +0200
commit	a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6 (patch)
tree	18395a3438441f5ddeca984fab0db8c9f8133a2c
parent	fa37356074c1bc3a0f74b6f6a22e038b7842ff4d (diff)
download	markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.tar.gz markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.tar.bz2 markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.zip