aboutsummaryrefslogtreecommitdiffstats
path: root/testing/benches/normalize_identifier.rs
diff options
context:
space:
mode:
Diffstat (limited to 'testing/benches/normalize_identifier.rs')
-rw-r--r--testing/benches/normalize_identifier.rs694
1 files changed, 694 insertions, 0 deletions
diff --git a/testing/benches/normalize_identifier.rs b/testing/benches/normalize_identifier.rs
new file mode 100644
index 0000000..d84701d
--- /dev/null
+++ b/testing/benches/normalize_identifier.rs
@@ -0,0 +1,694 @@
+use std::collections::HashSet;
+use std::iter::FusedIterator;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+criterion_main!(benches);
+criterion_group!(benches, functions);
+
+fn functions(c: &mut Criterion) {
+ let words = Words::default().collect::<Vec<_>>();
+
+ macro_rules! bench_function {
+ ($($func:ident)*) => {
+ for word in words.iter().collect::<HashSet<_>>() {
+ let linear: &str = normalize_identifier_linear(word);
+ $(
+ assert_eq!(linear, $func(word));
+ )*
+ }
+
+ $(
+ c.bench_function(stringify!($func), |b| {
+ b.iter(|| {
+ for s in &words {
+ black_box($func(black_box(s)));
+ }
+ });
+ });
+ )*
+ };
+ }
+
+ bench_function! {
+ normalize_identifier_linear
+ normalize_identifier_linear_replacement_only
+ normalize_identifier_bisect
+ normalize_identifier_bisect_replacement_only
+ normalize_identifier_linear_by_len
+ normalize_identifier_linear_by_len_replacement_only
+ normalize_identifier_bisect_by_len
+ normalize_identifier_bisect_by_len_replacement_only
+ normalize_identifier_phf
+ }
+}
+
+fn normalize_identifier_linear(ident: &str) -> &str {
+ static USE_RAW: [(&str, &str); 47] = [
+ ("abstract", "r#abstract"),
+ ("as", "r#as"),
+ ("async", "r#async"),
+ ("await", "r#await"),
+ ("become", "r#become"),
+ ("box", "r#box"),
+ ("break", "r#break"),
+ ("const", "r#const"),
+ ("continue", "r#continue"),
+ ("crate", "r#crate"),
+ ("do", "r#do"),
+ ("dyn", "r#dyn"),
+ ("else", "r#else"),
+ ("enum", "r#enum"),
+ ("extern", "r#extern"),
+ ("false", "r#false"),
+ ("final", "r#final"),
+ ("fn", "r#fn"),
+ ("for", "r#for"),
+ ("if", "r#if"),
+ ("impl", "r#impl"),
+ ("in", "r#in"),
+ ("let", "r#let"),
+ ("macro", "r#macro"),
+ ("match", "r#match"),
+ ("mod", "r#mod"),
+ ("move", "r#move"),
+ ("mut", "r#mut"),
+ ("override", "r#override"),
+ ("priv", "r#priv"),
+ ("pub", "r#pub"),
+ ("ref", "r#ref"),
+ ("return", "r#return"),
+ ("static", "r#static"),
+ ("struct", "r#struct"),
+ ("trait", "r#trait"),
+ ("true", "r#true"),
+ ("try", "r#try"),
+ ("type", "r#type"),
+ ("typeof", "r#typeof"),
+ ("unsafe", "r#unsafe"),
+ ("unsized", "r#unsized"),
+ ("use", "r#use"),
+ ("virtual", "r#virtual"),
+ ("where", "r#where"),
+ ("while", "r#while"),
+ ("yield", "r#yield"),
+ ];
+
+ if let Some(word) = USE_RAW.iter().find(|x| x.0 == ident) {
+ word.1
+ } else {
+ ident
+ }
+}
+
+fn normalize_identifier_bisect(ident: &str) -> &str {
+ static USE_RAW: [(&str, &str); 47] = [
+ ("abstract", "r#abstract"),
+ ("as", "r#as"),
+ ("async", "r#async"),
+ ("await", "r#await"),
+ ("become", "r#become"),
+ ("box", "r#box"),
+ ("break", "r#break"),
+ ("const", "r#const"),
+ ("continue", "r#continue"),
+ ("crate", "r#crate"),
+ ("do", "r#do"),
+ ("dyn", "r#dyn"),
+ ("else", "r#else"),
+ ("enum", "r#enum"),
+ ("extern", "r#extern"),
+ ("false", "r#false"),
+ ("final", "r#final"),
+ ("fn", "r#fn"),
+ ("for", "r#for"),
+ ("if", "r#if"),
+ ("impl", "r#impl"),
+ ("in", "r#in"),
+ ("let", "r#let"),
+ ("macro", "r#macro"),
+ ("match", "r#match"),
+ ("mod", "r#mod"),
+ ("move", "r#move"),
+ ("mut", "r#mut"),
+ ("override", "r#override"),
+ ("priv", "r#priv"),
+ ("pub", "r#pub"),
+ ("ref", "r#ref"),
+ ("return", "r#return"),
+ ("static", "r#static"),
+ ("struct", "r#struct"),
+ ("trait", "r#trait"),
+ ("true", "r#true"),
+ ("try", "r#try"),
+ ("type", "r#type"),
+ ("typeof", "r#typeof"),
+ ("unsafe", "r#unsafe"),
+ ("unsized", "r#unsized"),
+ ("use", "r#use"),
+ ("virtual", "r#virtual"),
+ ("where", "r#where"),
+ ("while", "r#while"),
+ ("yield", "r#yield"),
+ ];
+
+ if let Ok(idx) = USE_RAW.binary_search_by(|(probe, _)| (*probe).cmp(ident)) {
+ USE_RAW[idx].1
+ } else {
+ ident
+ }
+}
+
+fn normalize_identifier_linear_replacement_only(ident: &str) -> &str {
+ static USE_RAW: [&str; 47] = [
+ "r#abstract",
+ "r#as",
+ "r#async",
+ "r#await",
+ "r#become",
+ "r#box",
+ "r#break",
+ "r#const",
+ "r#continue",
+ "r#crate",
+ "r#do",
+ "r#dyn",
+ "r#else",
+ "r#enum",
+ "r#extern",
+ "r#false",
+ "r#final",
+ "r#fn",
+ "r#for",
+ "r#if",
+ "r#impl",
+ "r#in",
+ "r#let",
+ "r#macro",
+ "r#match",
+ "r#mod",
+ "r#move",
+ "r#mut",
+ "r#override",
+ "r#priv",
+ "r#pub",
+ "r#ref",
+ "r#return",
+ "r#static",
+ "r#struct",
+ "r#trait",
+ "r#true",
+ "r#try",
+ "r#type",
+ "r#typeof",
+ "r#unsafe",
+ "r#unsized",
+ "r#use",
+ "r#virtual",
+ "r#where",
+ "r#while",
+ "r#yield",
+ ];
+
+ USE_RAW
+ .iter()
+ .find(|x| &x[2..] == ident)
+ .copied()
+ .unwrap_or(ident)
+}
+
+fn normalize_identifier_bisect_replacement_only(ident: &str) -> &str {
+ static USE_RAW: [&str; 47] = [
+ "r#abstract",
+ "r#as",
+ "r#async",
+ "r#await",
+ "r#become",
+ "r#box",
+ "r#break",
+ "r#const",
+ "r#continue",
+ "r#crate",
+ "r#do",
+ "r#dyn",
+ "r#else",
+ "r#enum",
+ "r#extern",
+ "r#false",
+ "r#final",
+ "r#fn",
+ "r#for",
+ "r#if",
+ "r#impl",
+ "r#in",
+ "r#let",
+ "r#macro",
+ "r#match",
+ "r#mod",
+ "r#move",
+ "r#mut",
+ "r#override",
+ "r#priv",
+ "r#pub",
+ "r#ref",
+ "r#return",
+ "r#static",
+ "r#struct",
+ "r#trait",
+ "r#true",
+ "r#try",
+ "r#type",
+ "r#typeof",
+ "r#unsafe",
+ "r#unsized",
+ "r#use",
+ "r#virtual",
+ "r#where",
+ "r#while",
+ "r#yield",
+ ];
+
+ if let Ok(idx) = USE_RAW.binary_search_by(|probe| probe[2..].cmp(ident)) {
+ USE_RAW[idx]
+ } else {
+ ident
+ }
+}
+
+fn normalize_identifier_linear_by_len(ident: &str) -> &str {
+ const KW0: &[([u8; 8], [u8; 10])] = &[];
+ const KW1: &[([u8; 8], [u8; 10])] = &[];
+ const KW2: &[([u8; 8], [u8; 10])] = &[
+ (*b"as______", *b"r#as______"),
+ (*b"do______", *b"r#do______"),
+ (*b"fn______", *b"r#fn______"),
+ (*b"if______", *b"r#if______"),
+ (*b"in______", *b"r#in______"),
+ ];
+ const KW3: &[([u8; 8], [u8; 10])] = &[
+ (*b"box_____", *b"r#box_____"),
+ (*b"dyn_____", *b"r#dyn_____"),
+ (*b"for_____", *b"r#for_____"),
+ (*b"let_____", *b"r#let_____"),
+ (*b"mod_____", *b"r#mod_____"),
+ (*b"mut_____", *b"r#mut_____"),
+ (*b"pub_____", *b"r#pub_____"),
+ (*b"ref_____", *b"r#ref_____"),
+ (*b"try_____", *b"r#try_____"),
+ (*b"use_____", *b"r#use_____"),
+ ];
+ const KW4: &[([u8; 8], [u8; 10])] = &[
+ (*b"else____", *b"r#else____"),
+ (*b"enum____", *b"r#enum____"),
+ (*b"impl____", *b"r#impl____"),
+ (*b"move____", *b"r#move____"),
+ (*b"priv____", *b"r#priv____"),
+ (*b"true____", *b"r#true____"),
+ (*b"type____", *b"r#type____"),
+ ];
+ const KW5: &[([u8; 8], [u8; 10])] = &[
+ (*b"async___", *b"r#async___"),
+ (*b"await___", *b"r#await___"),
+ (*b"break___", *b"r#break___"),
+ (*b"const___", *b"r#const___"),
+ (*b"crate___", *b"r#crate___"),
+ (*b"false___", *b"r#false___"),
+ (*b"final___", *b"r#final___"),
+ (*b"macro___", *b"r#macro___"),
+ (*b"match___", *b"r#match___"),
+ (*b"trait___", *b"r#trait___"),
+ (*b"where___", *b"r#where___"),
+ (*b"while___", *b"r#while___"),
+ (*b"yield___", *b"r#yield___"),
+ ];
+ const KW6: &[([u8; 8], [u8; 10])] = &[
+ (*b"become__", *b"r#become__"),
+ (*b"extern__", *b"r#extern__"),
+ (*b"return__", *b"r#return__"),
+ (*b"static__", *b"r#static__"),
+ (*b"struct__", *b"r#struct__"),
+ (*b"typeof__", *b"r#typeof__"),
+ (*b"unsafe__", *b"r#unsafe__"),
+ ];
+ const KW7: &[([u8; 8], [u8; 10])] = &[
+ (*b"unsized_", *b"r#unsized_"),
+ (*b"virtual_", *b"r#virtual_"),
+ ];
+ const KW8: &[([u8; 8], [u8; 10])] = &[
+ (*b"abstract", *b"r#abstract"),
+ (*b"continue", *b"r#continue"),
+ (*b"override", *b"r#override"),
+ ];
+ const KWS: &[&[([u8; 8], [u8; 10])]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+ if ident.len() > 8 {
+ return ident;
+ }
+ let kws = KWS[ident.len()];
+
+ let mut padded_ident = [b'_'; 8];
+ padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+ let replacement = match kws.iter().find(|(kw, _)| *kw == padded_ident) {
+ Some((_, replacement)) => replacement,
+ None => return ident,
+ };
+
+ // SAFETY: We know that the input byte slice is pure-ASCII.
+ unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) }
+}
+
+fn normalize_identifier_linear_by_len_replacement_only(ident: &str) -> &str {
+ const KW0: &[[u8; 10]] = &[];
+ const KW1: &[[u8; 10]] = &[];
+ const KW2: &[[u8; 10]] = &[
+ *b"r#as______",
+ *b"r#do______",
+ *b"r#fn______",
+ *b"r#if______",
+ *b"r#in______",
+ ];
+ const KW3: &[[u8; 10]] = &[
+ *b"r#box_____",
+ *b"r#dyn_____",
+ *b"r#for_____",
+ *b"r#let_____",
+ *b"r#mod_____",
+ *b"r#mut_____",
+ *b"r#pub_____",
+ *b"r#ref_____",
+ *b"r#try_____",
+ *b"r#use_____",
+ ];
+ const KW4: &[[u8; 10]] = &[
+ *b"r#else____",
+ *b"r#enum____",
+ *b"r#impl____",
+ *b"r#move____",
+ *b"r#priv____",
+ *b"r#true____",
+ *b"r#type____",
+ ];
+ const KW5: &[[u8; 10]] = &[
+ *b"r#async___",
+ *b"r#await___",
+ *b"r#break___",
+ *b"r#const___",
+ *b"r#crate___",
+ *b"r#false___",
+ *b"r#final___",
+ *b"r#macro___",
+ *b"r#match___",
+ *b"r#trait___",
+ *b"r#where___",
+ *b"r#while___",
+ *b"r#yield___",
+ ];
+ const KW6: &[[u8; 10]] = &[
+ *b"r#become__",
+ *b"r#extern__",
+ *b"r#return__",
+ *b"r#static__",
+ *b"r#struct__",
+ *b"r#typeof__",
+ *b"r#unsafe__",
+ ];
+ const KW7: &[[u8; 10]] = &[*b"r#unsized_", *b"r#virtual_"];
+ const KW8: &[[u8; 10]] = &[*b"r#abstract", *b"r#continue", *b"r#override"];
+ const KWS: &[&[[u8; 10]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+ if ident.len() > 8 {
+ return ident;
+ }
+ let kws = KWS[ident.len()];
+
+ let mut padded_ident = [b'_'; 8];
+ padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+ let replacement = match kws
+ .iter()
+ .find(|probe| padded_ident == <[u8; 8]>::try_from(&probe[2..]).unwrap())
+ {
+ Some(replacement) => replacement,
+ None => return ident,
+ };
+
+ // SAFETY: We know that the input byte slice is pure-ASCII.
+ unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) }
+}
+
+fn normalize_identifier_bisect_by_len(ident: &str) -> &str {
+ const KW0: &[([u8; 8], [u8; 10])] = &[];
+ const KW1: &[([u8; 8], [u8; 10])] = &[];
+ const KW2: &[([u8; 8], [u8; 10])] = &[
+ (*b"as______", *b"r#as______"),
+ (*b"do______", *b"r#do______"),
+ (*b"fn______", *b"r#fn______"),
+ (*b"if______", *b"r#if______"),
+ (*b"in______", *b"r#in______"),
+ ];
+ const KW3: &[([u8; 8], [u8; 10])] = &[
+ (*b"box_____", *b"r#box_____"),
+ (*b"dyn_____", *b"r#dyn_____"),
+ (*b"for_____", *b"r#for_____"),
+ (*b"let_____", *b"r#let_____"),
+ (*b"mod_____", *b"r#mod_____"),
+ (*b"mut_____", *b"r#mut_____"),
+ (*b"pub_____", *b"r#pub_____"),
+ (*b"ref_____", *b"r#ref_____"),
+ (*b"try_____", *b"r#try_____"),
+ (*b"use_____", *b"r#use_____"),
+ ];
+ const KW4: &[([u8; 8], [u8; 10])] = &[
+ (*b"else____", *b"r#else____"),
+ (*b"enum____", *b"r#enum____"),
+ (*b"impl____", *b"r#impl____"),
+ (*b"move____", *b"r#move____"),
+ (*b"priv____", *b"r#priv____"),
+ (*b"true____", *b"r#true____"),
+ (*b"type____", *b"r#type____"),
+ ];
+ const KW5: &[([u8; 8], [u8; 10])] = &[
+ (*b"async___", *b"r#async___"),
+ (*b"await___", *b"r#await___"),
+ (*b"break___", *b"r#break___"),
+ (*b"const___", *b"r#const___"),
+ (*b"crate___", *b"r#crate___"),
+ (*b"false___", *b"r#false___"),
+ (*b"final___", *b"r#final___"),
+ (*b"macro___", *b"r#macro___"),
+ (*b"match___", *b"r#match___"),
+ (*b"trait___", *b"r#trait___"),
+ (*b"where___", *b"r#where___"),
+ (*b"while___", *b"r#while___"),
+ (*b"yield___", *b"r#yield___"),
+ ];
+ const KW6: &[([u8; 8], [u8; 10])] = &[
+ (*b"become__", *b"r#become__"),
+ (*b"extern__", *b"r#extern__"),
+ (*b"return__", *b"r#return__"),
+ (*b"static__", *b"r#static__"),
+ (*b"struct__", *b"r#struct__"),
+ (*b"typeof__", *b"r#typeof__"),
+ (*b"unsafe__", *b"r#unsafe__"),
+ ];
+ const KW7: &[([u8; 8], [u8; 10])] = &[
+ (*b"unsized_", *b"r#unsized_"),
+ (*b"virtual_", *b"r#virtual_"),
+ ];
+ const KW8: &[([u8; 8], [u8; 10])] = &[
+ (*b"abstract", *b"r#abstract"),
+ (*b"continue", *b"r#continue"),
+ (*b"override", *b"r#override"),
+ ];
+ const KWS: &[&[([u8; 8], [u8; 10])]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+ if ident.len() > 8 {
+ return ident;
+ }
+ let kws = KWS[ident.len()];
+
+ let mut padded_ident = [b'_'; 8];
+ padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+ if let Ok(idx) = kws.binary_search_by(|(probe, _)| probe.cmp(&padded_ident)) {
+ // SAFETY: We know that the input byte slice is pure-ASCII.
+ unsafe { std::str::from_utf8_unchecked(&kws[idx].1[..ident.len() + 2]) }
+ } else {
+ ident
+ }
+}
+
+fn normalize_identifier_bisect_by_len_replacement_only(ident: &str) -> &str {
+ const KW0: &[[u8; 10]] = &[];
+ const KW1: &[[u8; 10]] = &[];
+ const KW2: &[[u8; 10]] = &[
+ *b"r#as______",
+ *b"r#do______",
+ *b"r#fn______",
+ *b"r#if______",
+ *b"r#in______",
+ ];
+ const KW3: &[[u8; 10]] = &[
+ *b"r#box_____",
+ *b"r#dyn_____",
+ *b"r#for_____",
+ *b"r#let_____",
+ *b"r#mod_____",
+ *b"r#mut_____",
+ *b"r#pub_____",
+ *b"r#ref_____",
+ *b"r#try_____",
+ *b"r#use_____",
+ ];
+ const KW4: &[[u8; 10]] = &[
+ *b"r#else____",
+ *b"r#enum____",
+ *b"r#impl____",
+ *b"r#move____",
+ *b"r#priv____",
+ *b"r#true____",
+ *b"r#type____",
+ ];
+ const KW5: &[[u8; 10]] = &[
+ *b"r#async___",
+ *b"r#await___",
+ *b"r#break___",
+ *b"r#const___",
+ *b"r#crate___",
+ *b"r#false___",
+ *b"r#final___",
+ *b"r#macro___",
+ *b"r#match___",
+ *b"r#trait___",
+ *b"r#where___",
+ *b"r#while___",
+ *b"r#yield___",
+ ];
+ const KW6: &[[u8; 10]] = &[
+ *b"r#become__",
+ *b"r#extern__",
+ *b"r#return__",
+ *b"r#static__",
+ *b"r#struct__",
+ *b"r#typeof__",
+ *b"r#unsafe__",
+ ];
+ const KW7: &[[u8; 10]] = &[*b"r#unsized_", *b"r#virtual_"];
+ const KW8: &[[u8; 10]] = &[*b"r#abstract", *b"r#continue", *b"r#override"];
+ const KWS: &[&[[u8; 10]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+ if ident.len() > 8 {
+ return ident;
+ }
+ let kws = KWS[ident.len()];
+
+ let mut padded_ident = [b'_'; 8];
+ padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+ let idx =
+ kws.binary_search_by(|probe| <[u8; 8]>::try_from(&probe[2..]).unwrap().cmp(&padded_ident));
+ match idx {
+ Ok(idx) => unsafe { std::str::from_utf8_unchecked(&kws[idx][..ident.len() + 2]) },
+ Err(_) => ident,
+ }
+}
+
+fn normalize_identifier_phf(ident: &str) -> &str {
+ static USE_RAW: phf::Map<&str, &str> = phf::phf_map! {
+ "abstract" => "r#abstract",
+ "as" => "r#as",
+ "async" => "r#async",
+ "await" => "r#await",
+ "become" => "r#become",
+ "box" => "r#box",
+ "break" => "r#break",
+ "const" => "r#const",
+ "continue" => "r#continue",
+ "crate" => "r#crate",
+ "do" => "r#do",
+ "dyn" => "r#dyn",
+ "else" => "r#else",
+ "enum" => "r#enum",
+ "extern" => "r#extern",
+ "false" => "r#false",
+ "final" => "r#final",
+ "fn" => "r#fn",
+ "for" => "r#for",
+ "if" => "r#if",
+ "impl" => "r#impl",
+ "in" => "r#in",
+ "let" => "r#let",
+ "macro" => "r#macro",
+ "match" => "r#match",
+ "mod" => "r#mod",
+ "move" => "r#move",
+ "mut" => "r#mut",
+ "override" => "r#override",
+ "priv" => "r#priv",
+ "pub" => "r#pub",
+ "ref" => "r#ref",
+ "return" => "r#return",
+ "static" => "r#static",
+ "struct" => "r#struct",
+ "trait" => "r#trait",
+ "true" => "r#true",
+ "try" => "r#try",
+ "type" => "r#type",
+ "typeof" => "r#typeof",
+ "unsafe" => "r#unsafe",
+ "unsized" => "r#unsized",
+ "use" => "r#use",
+ "virtual" => "r#virtual",
+ "where" => "r#where",
+ "while" => "r#while",
+ "yield" => "r#yield",
+ };
+
+ USE_RAW.get(ident).copied().unwrap_or(ident)
+}
+
+struct Words(&'static str);
+
+impl Default for Words {
+ fn default() -> Self {
+ Self(include_str!("../../askama_derive/src/generator.rs"))
+ }
+}
+
+impl Iterator for Words {
+ type Item = &'static str;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let mut pos = self.0;
+ loop {
+ if pos.is_empty() {
+ self.0 = "";
+ return None;
+ } else if matches!(pos.as_bytes()[0], b'_' | b'a'..=b'z' | b'A'..=b'Z') {
+ break;
+ }
+
+ let mut chars = pos.chars();
+ chars.next();
+ pos = chars.as_str();
+ }
+
+ let start = pos;
+ loop {
+ if pos.is_empty() {
+ self.0 = "";
+ return Some(start);
+ } else if !matches!(pos.as_bytes()[0], b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9') {
+ self.0 = pos;
+ return Some(&start[..start.len() - pos.len()]);
+ }
+
+ let mut chars = pos.chars();
+ chars.next();
+ pos = chars.as_str();
+ }
+ }
+}
+
+impl FusedIterator for Words {}