aboutsummaryrefslogtreecommitdiffstats
path: root/src/util/normalize_identifier.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-22 17:24:05 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-22 17:24:05 +0200
commit79c3275f91f1c0867a1bfba3085c0682aa5486ef (patch)
treebe30b9a8b755bc6bc01e3f9d59e7d69c60b80b24 /src/util/normalize_identifier.rs
parentb0accb11f1aade55e9fc4dc0a1c1d1b8362ab5d9 (diff)
downloadmarkdown-rs-79c3275f91f1c0867a1bfba3085c0682aa5486ef.tar.gz
markdown-rs-79c3275f91f1c0867a1bfba3085c0682aa5486ef.tar.bz2
markdown-rs-79c3275f91f1c0867a1bfba3085c0682aa5486ef.zip
Add support for normalizing identifiers
Diffstat (limited to 'src/util/normalize_identifier.rs')
-rw-r--r--src/util/normalize_identifier.rs37
1 files changed, 37 insertions, 0 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
new file mode 100644
index 0000000..870fd33
--- /dev/null
+++ b/src/util/normalize_identifier.rs
@@ -0,0 +1,37 @@
+//! To do.
+
+/// To do.
+pub fn normalize_identifier(value: &str) -> String {
+ let mut codes = vec![];
+ let mut at_start = true;
+ let mut at_whitespace = true;
+
+ // Collapse markdown whitespace and trim it.
+ for char in value.chars() {
+ match char {
+ '\t' | '\r' | '\n' | ' ' => {
+ at_whitespace = true;
+ }
+ _ => {
+ if at_whitespace && !at_start {
+ codes.push(' ');
+ }
+
+ codes.push(char);
+ at_start = false;
+ at_whitespace = false;
+ }
+ }
+ }
+
+ // To do: test if this matches unicode.
+ // Some characters are considered “uppercase”, but if their lowercase
+ // counterpart is uppercased will result in a different uppercase
+ // character.
+ // Hence, to get that form, we perform both lower- and uppercase.
+ codes
+ .iter()
+ .collect::<String>()
+ .to_uppercase()
+ .to_lowercase()
+}