diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-15 18:22:40 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-15 18:22:40 +0200 |
commit | 2379c2749916483be68dbf816a4c56cd59ced958 (patch) | |
tree | 5db8ea01782212b3f465d40f912ed87481012bbb /src/util/normalize_identifier.rs | |
parent | 3aa45de9dc359169ccaabc07ffa986d72a010cd8 (diff) | |
download | markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.tar.gz markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.tar.bz2 markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.zip |
Refactor to proof docs, grammars
Diffstat (limited to '')
-rw-r--r-- | src/util/normalize_identifier.rs | 30 |
1 files changed, 13 insertions, 17 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs index f5b12d0..ddc51f8 100644 --- a/src/util/normalize_identifier.rs +++ b/src/util/normalize_identifier.rs @@ -1,17 +1,25 @@ -//! Utility to normalize identifiers. +//! Normalize identifiers. /// Normalize an identifier, as found in [references][label_end] and /// [definitions][definition], so it can be compared when matching. /// /// This collapsed whitespace found in markdown (`\t`, `\r`, `\n`, and ` `) -/// into one space, trims it (as in, dropping the first and last space), -/// and then performs unicode case folding twice: first by uppercasing -/// lowercase characters, and then lowercasing uppercase characters. +/// into one space, trims it (as in, dropping the first and last space), and +/// then performs unicode case folding twice: first by lowercasing uppercase +/// characters, and then uppercasing lowercase characters. /// /// Some characters are considered “uppercase”, such as U+03F4 (`ϴ`), but if /// their lowercase counterpart (U+03B8 (`θ`)) is uppercased will result in a /// different uppercase character (U+0398 (`Θ`)). -/// Hence, to get that form, we perform both upper- and lowercase. +/// Hence, to get that form, we perform both lower- and uppercase. +/// +/// Performing these steps in that order works, but the inverse does not work. +/// To illustrate, say the source markdown containes two identifiers +/// `SS` (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to +/// `ss` (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both +/// uppercase to `SS` (U+0053 U+0053). +/// If we’d inverse the steps, for `ẞ`, we’d first uppercase without a +/// change, and then lowercase to `ß`, which would not match `ss`. /// /// ## Examples /// @@ -64,17 +72,5 @@ pub fn normalize_identifier(value: &str) -> String { result.push_str(&value[start..]); } - // Some characters are considered “uppercase”, but if their lowercase - // counterpart is uppercased will result in a different uppercase - // character. - // Hence, to get that form, we perform both lower- and uppercase. - // Performing these steps in that order works, but the inverse does not - // work. - // To illustrate, say the source markdown containes two identifiers `SS` - // (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to `ss` - // (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both uppercase - // to `SS` (U+0053 U+0053). - // If we’d inverse the steps, for `ẞ`, we’d first uppercase without a - // change, and then lowercase to `ß`, which would not match `ss`. result.to_lowercase().to_uppercase() } |