Add tests for matching identifiers w/ turkish `i`

author: Titus Wormer <tituswormer@gmail.com> 2022-07-11 11:46:47 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-11 11:46:47 +0200
commit: c98f902eeae2ac9952173385b146249f11364765 (patch)
tree: 88179439f59608ccde7d32ded1b7dcf1602ccc5e
parent: 1fe3139713fb9e5f000af458154ddf634d6ebe64 (diff)
download: markdown-rs-c98f902eeae2ac9952173385b146249f11364765.tar.gz
markdown-rs-c98f902eeae2ac9952173385b146249f11364765.tar.bz2
markdown-rs-c98f902eeae2ac9952173385b146249f11364765.zip
3 files changed, 26 insertions, 3 deletions
diff --git a/readme.md b/readme.md
index ad8aec0..2d87144 100644
--- a/readme.md
+++ b/readme.md
@@ -152,8 +152,6 @@ cargo doc --document-private-items
 - [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept,
       how to integrate with streams or so?
 - [ ] (1) Go through clippy rules, and such, to add strict code styles
-- [ ] (1) Make sure that rust character groups match CM character groups (e.g., is
-      `unicode_whitespace` or so the same?)
 - [ ] (1) Any special handling of surrogates?
 - [ ] (1) Make sure debugging, assertions are useful for other folks
 - [ ] (3) Add some benchmarks (against comrak, pulldown-cmark, kramdown?), do some perf testing
@@ -274,3 +272,4 @@ important.
 - [x] (1) Use `char::REPLACEMENT_CHARACTER`?
 - [x] (3) Add support for concrete constructs
       (html (flow) or code (fenced) cannot be “pierced” into by containers)
+- [x] (1) Make sure that rust character groups match CM character groups
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index 123a3a9..73f246d 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -54,7 +54,6 @@ pub fn normalize_identifier(value: &str) -> String {
         }
     }
 
-    // To do: test if this matches unicode.
     // Some characters are considered “uppercase”, but if their lowercase
     // counterpart is uppercased will result in a different uppercase
     // character.
diff --git a/tests/definition.rs b/tests/definition.rs
index 0686b6d..9bf4072 100644
--- a/tests/definition.rs
+++ b/tests/definition.rs
@@ -112,6 +112,31 @@ fn definition() {
     );
 
     assert_eq!(
+        micromark("[ı]: a\n\n[I]"),
+        "<p><a href=\"a\">I</a></p>",
+        "should match w/ undotted turkish i (1)"
+    );
+    assert_eq!(
+        micromark("[I]: a\n\n[ı]"),
+        "<p><a href=\"a\">ı</a></p>",
+        "should match w/ undotted turkish i (2)"
+    );
+    // Ref: <https://spec.commonmark.org/dingus/?text=%5Bi%5D%3A%20a%0A%0A%5Bİ%5D>
+    // GFM parses the same (last checked: 2022-07-11).
+    assert_eq!(
+        micromark("[i]: a\n\n[İ]"),
+        "<p>[İ]</p>",
+        "should *not* match w/ dotted turkish i (1)"
+    );
+    // Ref: <https://spec.commonmark.org/dingus/?text=%5Bİ%5D%3A%20a%0A%0A%5Bi%5D>
+    // GFM parses the same (last checked: 2022-07-11).
+    assert_eq!(
+        micromark("[İ]: a\n\n[i]"),
+        "<p>[i]</p>",
+        "should *not* match w/ dotted turkish i (2)"
+    );
+
+    assert_eq!(
         micromark("[foo]: /url"),
         "",
         "should not contribute anything w/o reference (1)"
author	Titus Wormer <tituswormer@gmail.com>	2022-07-11 11:46:47 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-11 11:46:47 +0200
commit	c98f902eeae2ac9952173385b146249f11364765 (patch)
tree	88179439f59608ccde7d32ded1b7dcf1602ccc5e
parent	1fe3139713fb9e5f000af458154ddf634d6ebe64 (diff)
download	markdown-rs-c98f902eeae2ac9952173385b146249f11364765.tar.gz markdown-rs-c98f902eeae2ac9952173385b146249f11364765.tar.bz2 markdown-rs-c98f902eeae2ac9952173385b146249f11364765.zip