aboutsummaryrefslogtreecommitdiffstats
path: root/src/util/normalize_identifier.rs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/util/normalize_identifier.rs35
1 files changed, 33 insertions, 2 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index 870fd33..c287e1a 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -1,6 +1,37 @@
-//! To do.
+//! Utility to normalize identifiers.
-/// To do.
+/// Normalize an identifier, as found in references and
+/// [definitions][definition], so it can be compared when matching.
+///
+/// This collapsed whitespace found in markdown (`\t`, `\r`, `\n`, and ` `)
+/// into one space, trims it (as in, dropping the first and last space),
+/// and then performs unicode case folding twice: first by uppercasing
+/// lowercase characters, and then lowercasing uppercase characters.
+///
+/// Some characters are considered “uppercase”, such as U+03F4 (`ϴ`), but if
+/// their lowercase counterpart (U+03B8 (`θ`)) is uppercased will result in a
+/// different uppercase character (U+0398 (`Θ`)).
+/// Hence, to get that form, we perform both upper- and lowercase.
+///
+/// ## Examples
+///
+/// ```rust ignore
+/// micromark::util::normalize_identifier::normalize_identifier;
+///
+/// assert_eq!(normalize_identifier(" a "), "a");
+/// assert_eq!(normalize_identifier("a\t\r\nb"), "a b");
+/// assert_eq!(normalize_identifier("ПРИВЕТ"), "привет");
+/// assert_eq!(normalize_identifier("Привет"), "привет");
+/// assert_eq!(normalize_identifier("привет"), "привет");
+/// ```
+///
+/// ## References
+///
+/// * [`micromark-util-normalize-identifier` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-normalize-identifier)
+///
+/// [definition]: crate::construct::definition
+///
+/// <!--To do: link resource.-->
pub fn normalize_identifier(value: &str) -> String {
let mut codes = vec![];
let mut at_start = true;