Refactor to improve states

* Remove custom kind wrappers, use plain bytes instead * Remove `Into`s, use the explicit expected types instead * Refactor to use `slice.as_str` in most places * Remove unneeded unique check before adding a definition * Use a shared CDATA prefix in constants * Inline byte checks into matches * Pass bytes back from parser instead of whole parse state * Refactor to work more often on bytes * Rename custom `size` to `len`
author: Titus Wormer <tituswormer@gmail.com> 2022-07-29 18:22:59 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-29 18:22:59 +0200
commit: 0eeff9148e327183e532752f46421a75506dd7a6 (patch)
tree: 4f0aed04f90aa759ce96a2e87aa719e7fa95c450 /src/util/normalize_identifier.rs
parent: 148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (diff)
download: markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.gz
markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.bz2
markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.zip
1 files changed, 24 insertions, 15 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index 42a2bb0..f5b12d0 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -34,25 +34,34 @@
 pub fn normalize_identifier(value: &str) -> String {
     // Note: it’ll grow a bit smaller for consecutive whitespace.
     let mut result = String::with_capacity(value.len());
-    let mut at_start = true;
-    let mut at_whitespace = true;
+    let bytes = value.as_bytes();
+    let mut in_whitespace = true;
+    let mut index = 0;
+    let mut start = 0;
 
-    // Collapse markdown whitespace and trim it.
-    for char in value.chars() {
-        match char {
-            '\t' | '\n' | '\r' | ' ' => {
-                at_whitespace = true;
+    while index < bytes.len() {
+        if matches!(bytes[index], b'\t' | b'\n' | b'\r' | b' ') {
+            // First whitespace we see after non-whitespace.
+            if !in_whitespace {
+                result.push_str(&value[start..index]);
+                in_whitespace = true;
             }
-            _ => {
-                if at_whitespace && !at_start {
-                    result.push(' ');
-                }
-
-                result.push(char);
-                at_start = false;
-                at_whitespace = false;
+        }
+        // First non-whitespace we see after whitespace.
+        else if in_whitespace {
+            if start != 0 {
+                result.push(' ');
             }
+
+            start = index;
+            in_whitespace = false;
         }
+
+        index += 1;
+    }
+
+    if !in_whitespace {
+        result.push_str(&value[start..]);
     }
 
     // Some characters are considered “uppercase”, but if their lowercase
author	Titus Wormer <tituswormer@gmail.com>	2022-07-29 18:22:59 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-29 18:22:59 +0200
commit	0eeff9148e327183e532752f46421a75506dd7a6 (patch)
tree	4f0aed04f90aa759ce96a2e87aa719e7fa95c450 /src/util/normalize_identifier.rs
parent	148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (diff)
download	markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.gz markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.bz2 markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.zip