diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-29 18:22:59 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-29 18:22:59 +0200 |
commit | 0eeff9148e327183e532752f46421a75506dd7a6 (patch) | |
tree | 4f0aed04f90aa759ce96a2e87aa719e7fa95c450 /src/util/normalize_identifier.rs | |
parent | 148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (diff) | |
download | markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.gz markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.bz2 markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.zip |
Refactor to improve states
* Remove custom kind wrappers, use plain bytes instead
* Remove `Into`s, use the explicit expected types instead
* Refactor to use `slice.as_str` in most places
* Remove unneeded unique check before adding a definition
* Use a shared CDATA prefix in constants
* Inline byte checks into matches
* Pass bytes back from parser instead of whole parse state
* Refactor to work more often on bytes
* Rename custom `size` to `len`
Diffstat (limited to 'src/util/normalize_identifier.rs')
-rw-r--r-- | src/util/normalize_identifier.rs | 39 |
1 files changed, 24 insertions, 15 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs index 42a2bb0..f5b12d0 100644 --- a/src/util/normalize_identifier.rs +++ b/src/util/normalize_identifier.rs @@ -34,25 +34,34 @@ pub fn normalize_identifier(value: &str) -> String { // Note: it’ll grow a bit smaller for consecutive whitespace. let mut result = String::with_capacity(value.len()); - let mut at_start = true; - let mut at_whitespace = true; + let bytes = value.as_bytes(); + let mut in_whitespace = true; + let mut index = 0; + let mut start = 0; - // Collapse markdown whitespace and trim it. - for char in value.chars() { - match char { - '\t' | '\n' | '\r' | ' ' => { - at_whitespace = true; + while index < bytes.len() { + if matches!(bytes[index], b'\t' | b'\n' | b'\r' | b' ') { + // First whitespace we see after non-whitespace. + if !in_whitespace { + result.push_str(&value[start..index]); + in_whitespace = true; } - _ => { - if at_whitespace && !at_start { - result.push(' '); - } - - result.push(char); - at_start = false; - at_whitespace = false; + } + // First non-whitespace we see after whitespace. + else if in_whitespace { + if start != 0 { + result.push(' '); } + + start = index; + in_whitespace = false; } + + index += 1; + } + + if !in_whitespace { + result.push_str(&value[start..]); } // Some characters are considered “uppercase”, but if their lowercase |