aboutsummaryrefslogtreecommitdiffstats
path: root/src/util/normalize_identifier.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-15 18:22:40 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-15 18:22:40 +0200
commit2379c2749916483be68dbf816a4c56cd59ced958 (patch)
tree5db8ea01782212b3f465d40f912ed87481012bbb /src/util/normalize_identifier.rs
parent3aa45de9dc359169ccaabc07ffa986d72a010cd8 (diff)
downloadmarkdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.tar.gz
markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.tar.bz2
markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.zip
Refactor to proof docs, grammars
Diffstat (limited to 'src/util/normalize_identifier.rs')
-rw-r--r--src/util/normalize_identifier.rs30
1 files changed, 13 insertions, 17 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index f5b12d0..ddc51f8 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -1,17 +1,25 @@
-//! Utility to normalize identifiers.
+//! Normalize identifiers.
/// Normalize an identifier, as found in [references][label_end] and
/// [definitions][definition], so it can be compared when matching.
///
/// This collapsed whitespace found in markdown (`\t`, `\r`, `\n`, and ` `)
-/// into one space, trims it (as in, dropping the first and last space),
-/// and then performs unicode case folding twice: first by uppercasing
-/// lowercase characters, and then lowercasing uppercase characters.
+/// into one space, trims it (as in, dropping the first and last space), and
+/// then performs unicode case folding twice: first by lowercasing uppercase
+/// characters, and then uppercasing lowercase characters.
///
/// Some characters are considered “uppercase”, such as U+03F4 (`ϴ`), but if
/// their lowercase counterpart (U+03B8 (`θ`)) is uppercased will result in a
/// different uppercase character (U+0398 (`Θ`)).
-/// Hence, to get that form, we perform both upper- and lowercase.
+/// Hence, to get that form, we perform both lower- and uppercase.
+///
+/// Performing these steps in that order works, but the inverse does not work.
+/// To illustrate, say the source markdown containes two identifiers
+/// `SS` (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to
+/// `ss` (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both
+/// uppercase to `SS` (U+0053 U+0053).
+/// If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
+/// change, and then lowercase to `ß`, which would not match `ss`.
///
/// ## Examples
///
@@ -64,17 +72,5 @@ pub fn normalize_identifier(value: &str) -> String {
result.push_str(&value[start..]);
}
- // Some characters are considered “uppercase”, but if their lowercase
- // counterpart is uppercased will result in a different uppercase
- // character.
- // Hence, to get that form, we perform both lower- and uppercase.
- // Performing these steps in that order works, but the inverse does not
- // work.
- // To illustrate, say the source markdown containes two identifiers `SS`
- // (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to `ss`
- // (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both uppercase
- // to `SS` (U+0053 U+0053).
- // If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
- // change, and then lowercase to `ß`, which would not match `ss`.
result.to_lowercase().to_uppercase()
}