diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-22 17:51:14 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-22 17:51:14 +0200 |
commit | 5416b61f89155b475fa5674898b8ff963aa443b5 (patch) | |
tree | f4709e5f350964f7735207cacd936dbb36f898d9 /src/util/normalize_identifier.rs | |
parent | 79c3275f91f1c0867a1bfba3085c0682aa5486ef (diff) | |
download | markdown-rs-5416b61f89155b475fa5674898b8ff963aa443b5.tar.gz markdown-rs-5416b61f89155b475fa5674898b8ff963aa443b5.tar.bz2 markdown-rs-5416b61f89155b475fa5674898b8ff963aa443b5.zip |
Add docs on normalizing identifiers, matching
Diffstat (limited to '')
-rw-r--r-- | src/util/normalize_identifier.rs | 35 |
1 files changed, 33 insertions, 2 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs index 870fd33..c287e1a 100644 --- a/src/util/normalize_identifier.rs +++ b/src/util/normalize_identifier.rs @@ -1,6 +1,37 @@ -//! To do. +//! Utility to normalize identifiers. -/// To do. +/// Normalize an identifier, as found in references and +/// [definitions][definition], so it can be compared when matching. +/// +/// This collapsed whitespace found in markdown (`\t`, `\r`, `\n`, and ` `) +/// into one space, trims it (as in, dropping the first and last space), +/// and then performs unicode case folding twice: first by uppercasing +/// lowercase characters, and then lowercasing uppercase characters. +/// +/// Some characters are considered “uppercase”, such as U+03F4 (`ϴ`), but if +/// their lowercase counterpart (U+03B8 (`θ`)) is uppercased will result in a +/// different uppercase character (U+0398 (`Θ`)). +/// Hence, to get that form, we perform both upper- and lowercase. +/// +/// ## Examples +/// +/// ```rust ignore +/// micromark::util::normalize_identifier::normalize_identifier; +/// +/// assert_eq!(normalize_identifier(" a "), "a"); +/// assert_eq!(normalize_identifier("a\t\r\nb"), "a b"); +/// assert_eq!(normalize_identifier("ПРИВЕТ"), "привет"); +/// assert_eq!(normalize_identifier("Привет"), "привет"); +/// assert_eq!(normalize_identifier("привет"), "привет"); +/// ``` +/// +/// ## References +/// +/// * [`micromark-util-normalize-identifier` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-normalize-identifier) +/// +/// [definition]: crate::construct::definition +/// +/// <!--To do: link resource.--> pub fn normalize_identifier(value: &str) -> String { let mut codes = vec![]; let mut at_start = true; |