From 0eeff9148e327183e532752f46421a75506dd7a6 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Fri, 29 Jul 2022 18:22:59 +0200
Subject: Refactor to improve states

*   Remove custom kind wrappers, use plain bytes instead
*   Remove `Into`s, use the explicit expected types instead
*   Refactor to use `slice.as_str` in most places
*   Remove unneeded unique check before adding a definition
*   Use a shared CDATA prefix in constants
*   Inline byte checks into matches
*   Pass bytes back from parser instead of whole parse state
*   Refactor to work more often on bytes
*   Rename custom `size` to `len`
---
 src/util/normalize_identifier.rs | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

(limited to 'src/util/normalize_identifier.rs')

diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index 42a2bb0..f5b12d0 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -34,25 +34,34 @@
 pub fn normalize_identifier(value: &str) -> String {
     // Note: it’ll grow a bit smaller for consecutive whitespace.
     let mut result = String::with_capacity(value.len());
-    let mut at_start = true;
-    let mut at_whitespace = true;
+    let bytes = value.as_bytes();
+    let mut in_whitespace = true;
+    let mut index = 0;
+    let mut start = 0;
 
-    // Collapse markdown whitespace and trim it.
-    for char in value.chars() {
-        match char {
-            '\t' | '\n' | '\r' | ' ' => {
-                at_whitespace = true;
+    while index < bytes.len() {
+        if matches!(bytes[index], b'\t' | b'\n' | b'\r' | b' ') {
+            // First whitespace we see after non-whitespace.
+            if !in_whitespace {
+                result.push_str(&value[start..index]);
+                in_whitespace = true;
             }
-            _ => {
-                if at_whitespace && !at_start {
-                    result.push(' ');
-                }
-
-                result.push(char);
-                at_start = false;
-                at_whitespace = false;
+        }
+        // First non-whitespace we see after whitespace.
+        else if in_whitespace {
+            if start != 0 {
+                result.push(' ');
             }
+
+            start = index;
+            in_whitespace = false;
         }
+
+        index += 1;
+    }
+
+    if !in_whitespace {
+        result.push_str(&value[start..]);
     }
 
     // Some characters are considered “uppercase”, but if their lowercase
-- 
cgit