diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-18 10:58:40 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-18 10:58:40 +0200 |
commit | c1050b3527cc2d94ba1d8575e40fcc7700d3dcc3 (patch) | |
tree | 0a1ba71f4484f83c2237b8033f9b871689a262d8 | |
parent | 32f9f35d89e41072543186969b995ac9aa020f98 (diff) | |
download | markdown-rs-c1050b3527cc2d94ba1d8575e40fcc7700d3dcc3.tar.gz markdown-rs-c1050b3527cc2d94ba1d8575e40fcc7700d3dcc3.tar.bz2 markdown-rs-c1050b3527cc2d94ba1d8575e40fcc7700d3dcc3.zip |
Fix edge case in identifier normalization
-rw-r--r-- | src/util/normalize_identifier.rs | 10 | ||||
-rw-r--r-- | tests/commonmark.rs | 19 |
2 files changed, 18 insertions, 11 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs index 73f246d..feb7239 100644 --- a/src/util/normalize_identifier.rs +++ b/src/util/normalize_identifier.rs @@ -58,9 +58,17 @@ pub fn normalize_identifier(value: &str) -> String { // counterpart is uppercased will result in a different uppercase // character. // Hence, to get that form, we perform both lower- and uppercase. + // Performing these steps in that order works, but the inverse does not + // work. + // To illustrate, say the source markdown containes two identifiers `SS` + // (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to `ss` + // (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both uppercase + // to `SS` (U+0053 U+0053). + // If we’d inverse the steps, for `ẞ`, we’d first uppercase without a + // change, and then lowercase to `ß`, which would not match `ss`. codes .iter() .collect::<String>() - .to_uppercase() .to_lowercase() + .to_uppercase() } diff --git a/tests/commonmark.rs b/tests/commonmark.rs index 503acca..b75c940 100644 --- a/tests/commonmark.rs +++ b/tests/commonmark.rs @@ -6130,16 +6130,15 @@ bar>)</p> r###"Links (537)"### ); - // To do: Some unicode normalization bug. - // assert_eq!( - // micromark_with_options(r###"[ẞ] - - // [SS]: /url - // "###, DANGER), - // r###"<p><a href="/url">ẞ</a></p> - // "###, - // r###"Links (538)"### - // ); + assert_eq!( + micromark_with_options(r###"[ẞ] + +[SS]: /url +"###, DANGER), + r###"<p><a href="/url">ẞ</a></p> +"###, + r###"Links (538)"### +); assert_eq!( micromark_with_options(r###"[Foo |