From c1050b3527cc2d94ba1d8575e40fcc7700d3dcc3 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Mon, 18 Jul 2022 10:58:40 +0200
Subject: Fix edge case in identifier normalization

---
 src/util/normalize_identifier.rs | 10 +++++++++-
 tests/commonmark.rs              | 19 +++++++++----------
 2 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index 73f246d..feb7239 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -58,9 +58,17 @@ pub fn normalize_identifier(value: &str) -> String {
     // counterpart is uppercased will result in a different uppercase
     // character.
     // Hence, to get that form, we perform both lower- and uppercase.
+    // Performing these steps in that order works, but the inverse does not
+    // work.
+    // To illustrate, say the source markdown containes two identifiers `SS`
+    // (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to `ss`
+    // (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both uppercase
+    // to `SS` (U+0053 U+0053).
+    // If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
+    // change, and then lowercase to `ß`, which would not match `ss`.
     codes
         .iter()
         .collect::<String>()
-        .to_uppercase()
         .to_lowercase()
+        .to_uppercase()
 }
diff --git a/tests/commonmark.rs b/tests/commonmark.rs
index 503acca..b75c940 100644
--- a/tests/commonmark.rs
+++ b/tests/commonmark.rs
@@ -6130,16 +6130,15 @@ bar>)</p>
         r###"Links (537)"###
 );
 
-    // To do: Some unicode normalization bug.
-    //     assert_eq!(
-    //         micromark_with_options(r###"[ẞ]
-
-    // [SS]: /url
-    // "###, DANGER),
-    //         r###"<p><a href="/url">ẞ</a></p>
-    // "###,
-    //         r###"Links (538)"###
-    // );
+    assert_eq!(
+        micromark_with_options(r###"[ẞ]
+
+[SS]: /url
+"###, DANGER),
+        r###"<p><a href="/url">ẞ</a></p>
+"###,
+        r###"Links (538)"###
+);
 
     assert_eq!(
         micromark_with_options(r###"[Foo
-- 
cgit