From c98f902eeae2ac9952173385b146249f11364765 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Mon, 11 Jul 2022 11:46:47 +0200
Subject: Add tests for matching identifiers w/ turkish `i`

---
 readme.md                        |  3 +--
 src/util/normalize_identifier.rs |  1 -
 tests/definition.rs              | 25 +++++++++++++++++++++++++
 3 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/readme.md b/readme.md
index ad8aec0..2d87144 100644
--- a/readme.md
+++ b/readme.md
@@ -152,8 +152,6 @@ cargo doc --document-private-items
 - [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept,
       how to integrate with streams or so?
 - [ ] (1) Go through clippy rules, and such, to add strict code styles
-- [ ] (1) Make sure that rust character groups match CM character groups (e.g., is
-      `unicode_whitespace` or so the same?)
 - [ ] (1) Any special handling of surrogates?
 - [ ] (1) Make sure debugging, assertions are useful for other folks
 - [ ] (3) Add some benchmarks (against comrak, pulldown-cmark, kramdown?), do some perf testing
@@ -274,3 +272,4 @@ important.
 - [x] (1) Use `char::REPLACEMENT_CHARACTER`?
 - [x] (3) Add support for concrete constructs
       (html (flow) or code (fenced) cannot be “pierced” into by containers)
+- [x] (1) Make sure that rust character groups match CM character groups
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index 123a3a9..73f246d 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -54,7 +54,6 @@ pub fn normalize_identifier(value: &str) -> String {
         }
     }
 
-    // To do: test if this matches unicode.
     // Some characters are considered “uppercase”, but if their lowercase
     // counterpart is uppercased will result in a different uppercase
     // character.
diff --git a/tests/definition.rs b/tests/definition.rs
index 0686b6d..9bf4072 100644
--- a/tests/definition.rs
+++ b/tests/definition.rs
@@ -111,6 +111,31 @@ fn definition() {
         "should match w/ case-insensitive (2)"
     );
 
+    assert_eq!(
+        micromark("[ı]: a\n\n[I]"),
+        "<p><a href=\"a\">I</a></p>",
+        "should match w/ undotted turkish i (1)"
+    );
+    assert_eq!(
+        micromark("[I]: a\n\n[ı]"),
+        "<p><a href=\"a\">ı</a></p>",
+        "should match w/ undotted turkish i (2)"
+    );
+    // Ref: <https://spec.commonmark.org/dingus/?text=%5Bi%5D%3A%20a%0A%0A%5Bİ%5D>
+    // GFM parses the same (last checked: 2022-07-11).
+    assert_eq!(
+        micromark("[i]: a\n\n[İ]"),
+        "<p>[İ]</p>",
+        "should *not* match w/ dotted turkish i (1)"
+    );
+    // Ref: <https://spec.commonmark.org/dingus/?text=%5Bİ%5D%3A%20a%0A%0A%5Bi%5D>
+    // GFM parses the same (last checked: 2022-07-11).
+    assert_eq!(
+        micromark("[İ]: a\n\n[i]"),
+        "<p>[i]</p>",
+        "should *not* match w/ dotted turkish i (2)"
+    );
+
     assert_eq!(
         micromark("[foo]: /url"),
         "",
-- 
cgit