From 0eeff9148e327183e532752f46421a75506dd7a6 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 29 Jul 2022 18:22:59 +0200 Subject: Refactor to improve states * Remove custom kind wrappers, use plain bytes instead * Remove `Into`s, use the explicit expected types instead * Refactor to use `slice.as_str` in most places * Remove unneeded unique check before adding a definition * Use a shared CDATA prefix in constants * Inline byte checks into matches * Pass bytes back from parser instead of whole parse state * Refactor to work more often on bytes * Rename custom `size` to `len` --- src/util/normalize_identifier.rs | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'src/util/normalize_identifier.rs') diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs index 42a2bb0..f5b12d0 100644 --- a/src/util/normalize_identifier.rs +++ b/src/util/normalize_identifier.rs @@ -34,25 +34,34 @@ pub fn normalize_identifier(value: &str) -> String { // Note: it’ll grow a bit smaller for consecutive whitespace. let mut result = String::with_capacity(value.len()); - let mut at_start = true; - let mut at_whitespace = true; + let bytes = value.as_bytes(); + let mut in_whitespace = true; + let mut index = 0; + let mut start = 0; - // Collapse markdown whitespace and trim it. - for char in value.chars() { - match char { - '\t' | '\n' | '\r' | ' ' => { - at_whitespace = true; + while index < bytes.len() { + if matches!(bytes[index], b'\t' | b'\n' | b'\r' | b' ') { + // First whitespace we see after non-whitespace. + if !in_whitespace { + result.push_str(&value[start..index]); + in_whitespace = true; } - _ => { - if at_whitespace && !at_start { - result.push(' '); - } - - result.push(char); - at_start = false; - at_whitespace = false; + } + // First non-whitespace we see after whitespace. + else if in_whitespace { + if start != 0 { + result.push(' '); } + + start = index; + in_whitespace = false; } + + index += 1; + } + + if !in_whitespace { + result.push_str(&value[start..]); } // Some characters are considered “uppercase”, but if their lowercase -- cgit