diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-22 17:24:05 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-22 17:24:05 +0200 |
commit | 79c3275f91f1c0867a1bfba3085c0682aa5486ef (patch) | |
tree | be30b9a8b755bc6bc01e3f9d59e7d69c60b80b24 /src/util/normalize_identifier.rs | |
parent | b0accb11f1aade55e9fc4dc0a1c1d1b8362ab5d9 (diff) | |
download | markdown-rs-79c3275f91f1c0867a1bfba3085c0682aa5486ef.tar.gz markdown-rs-79c3275f91f1c0867a1bfba3085c0682aa5486ef.tar.bz2 markdown-rs-79c3275f91f1c0867a1bfba3085c0682aa5486ef.zip |
Add support for normalizing identifiers
Diffstat (limited to 'src/util/normalize_identifier.rs')
-rw-r--r-- | src/util/normalize_identifier.rs | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs new file mode 100644 index 0000000..870fd33 --- /dev/null +++ b/src/util/normalize_identifier.rs @@ -0,0 +1,37 @@ +//! To do. + +/// To do. +pub fn normalize_identifier(value: &str) -> String { + let mut codes = vec![]; + let mut at_start = true; + let mut at_whitespace = true; + + // Collapse markdown whitespace and trim it. + for char in value.chars() { + match char { + '\t' | '\r' | '\n' | ' ' => { + at_whitespace = true; + } + _ => { + if at_whitespace && !at_start { + codes.push(' '); + } + + codes.push(char); + at_start = false; + at_whitespace = false; + } + } + } + + // To do: test if this matches unicode. + // Some characters are considered “uppercase”, but if their lowercase + // counterpart is uppercased will result in a different uppercase + // character. + // Hence, to get that form, we perform both lower- and uppercase. + codes + .iter() + .collect::<String>() + .to_uppercase() + .to_lowercase() +} |