aboutsummaryrefslogtreecommitdiffstats
path: root/src/util/normalize_identifier.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 18:22:59 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 18:22:59 +0200
commit0eeff9148e327183e532752f46421a75506dd7a6 (patch)
tree4f0aed04f90aa759ce96a2e87aa719e7fa95c450 /src/util/normalize_identifier.rs
parent148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (diff)
downloadmarkdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.gz
markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.bz2
markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.zip
Refactor to improve states
* Remove custom kind wrappers, use plain bytes instead * Remove `Into`s, use the explicit expected types instead * Refactor to use `slice.as_str` in most places * Remove unneeded unique check before adding a definition * Use a shared CDATA prefix in constants * Inline byte checks into matches * Pass bytes back from parser instead of whole parse state * Refactor to work more often on bytes * Rename custom `size` to `len`
Diffstat (limited to 'src/util/normalize_identifier.rs')
-rw-r--r--src/util/normalize_identifier.rs39
1 files changed, 24 insertions, 15 deletions
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index 42a2bb0..f5b12d0 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -34,25 +34,34 @@
pub fn normalize_identifier(value: &str) -> String {
// Note: it’ll grow a bit smaller for consecutive whitespace.
let mut result = String::with_capacity(value.len());
- let mut at_start = true;
- let mut at_whitespace = true;
+ let bytes = value.as_bytes();
+ let mut in_whitespace = true;
+ let mut index = 0;
+ let mut start = 0;
- // Collapse markdown whitespace and trim it.
- for char in value.chars() {
- match char {
- '\t' | '\n' | '\r' | ' ' => {
- at_whitespace = true;
+ while index < bytes.len() {
+ if matches!(bytes[index], b'\t' | b'\n' | b'\r' | b' ') {
+ // First whitespace we see after non-whitespace.
+ if !in_whitespace {
+ result.push_str(&value[start..index]);
+ in_whitespace = true;
}
- _ => {
- if at_whitespace && !at_start {
- result.push(' ');
- }
-
- result.push(char);
- at_start = false;
- at_whitespace = false;
+ }
+ // First non-whitespace we see after whitespace.
+ else if in_whitespace {
+ if start != 0 {
+ result.push(' ');
}
+
+ start = index;
+ in_whitespace = false;
}
+
+ index += 1;
+ }
+
+ if !in_whitespace {
+ result.push_str(&value[start..]);
}
// Some characters are considered “uppercase”, but if their lowercase