aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/util/normalize_identifier.rs10
-rw-r--r--tests/commonmark.rs19
2 files changed, 18 insertions, 11 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index 73f246d..feb7239 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -58,9 +58,17 @@ pub fn normalize_identifier(value: &str) -> String {
// counterpart is uppercased will result in a different uppercase
// character.
// Hence, to get that form, we perform both lower- and uppercase.
+ // Performing these steps in that order works, but the inverse does not
+ // work.
+ // To illustrate, say the source markdown containes two identifiers `SS`
+ // (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to `ss`
+ // (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both uppercase
+ // to `SS` (U+0053 U+0053).
+ // If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
+ // change, and then lowercase to `ß`, which would not match `ss`.
codes
.iter()
.collect::<String>()
- .to_uppercase()
.to_lowercase()
+ .to_uppercase()
}
diff --git a/tests/commonmark.rs b/tests/commonmark.rs
index 503acca..b75c940 100644
--- a/tests/commonmark.rs
+++ b/tests/commonmark.rs
@@ -6130,16 +6130,15 @@ bar>)</p>
r###"Links (537)"###
);
- // To do: Some unicode normalization bug.
- // assert_eq!(
- // micromark_with_options(r###"[ẞ]
-
- // [SS]: /url
- // "###, DANGER),
- // r###"<p><a href="/url">ẞ</a></p>
- // "###,
- // r###"Links (538)"###
- // );
+ assert_eq!(
+ micromark_with_options(r###"[ẞ]
+
+[SS]: /url
+"###, DANGER),
+ r###"<p><a href="/url">ẞ</a></p>
+"###,
+ r###"Links (538)"###
+);
assert_eq!(
micromark_with_options(r###"[Foo