diff options
Diffstat (limited to 'testing')
| -rw-r--r-- | testing/Cargo.toml | 5 | ||||
| -rw-r--r-- | testing/benches/normalize_identifier.rs | 694 | 
2 files changed, 699 insertions, 0 deletions
| diff --git a/testing/Cargo.toml b/testing/Cargo.toml index 793de53..20ef72a 100644 --- a/testing/Cargo.toml +++ b/testing/Cargo.toml @@ -15,6 +15,7 @@ markdown = ["comrak", "askama/markdown"]  [dependencies]  askama = { path = "../askama", version = "0.12" }  comrak = { version = "0.20", default-features = false, optional = true } +phf = { version = "0.11", features = ["macros" ]}  serde_json = { version = "1.0", optional = true }  [dev-dependencies] @@ -25,3 +26,7 @@ version_check = "0.9"  [[bench]]  name = "all"  harness = false + +[[bench]] +name = "normalize_identifier" +harness = false diff --git a/testing/benches/normalize_identifier.rs b/testing/benches/normalize_identifier.rs new file mode 100644 index 0000000..d84701d --- /dev/null +++ b/testing/benches/normalize_identifier.rs @@ -0,0 +1,694 @@ +use std::collections::HashSet; +use std::iter::FusedIterator; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +criterion_main!(benches); +criterion_group!(benches, functions); + +fn functions(c: &mut Criterion) { +    let words = Words::default().collect::<Vec<_>>(); + +    macro_rules! bench_function { +        ($($func:ident)*) => { +            for word in words.iter().collect::<HashSet<_>>() { +                let linear: &str = normalize_identifier_linear(word); +                $( +                    assert_eq!(linear, $func(word)); +                )* +            } + +            $( +                c.bench_function(stringify!($func), |b| { +                    b.iter(|| { +                        for s in &words { +                            black_box($func(black_box(s))); +                        } +                    }); +                }); +            )* +        }; +    } + +    bench_function! { +        normalize_identifier_linear +        normalize_identifier_linear_replacement_only +        normalize_identifier_bisect +        normalize_identifier_bisect_replacement_only +        normalize_identifier_linear_by_len +        normalize_identifier_linear_by_len_replacement_only +        normalize_identifier_bisect_by_len +        normalize_identifier_bisect_by_len_replacement_only +        normalize_identifier_phf +    } +} + +fn normalize_identifier_linear(ident: &str) -> &str { +    static USE_RAW: [(&str, &str); 47] = [ +        ("abstract", "r#abstract"), +        ("as", "r#as"), +        ("async", "r#async"), +        ("await", "r#await"), +        ("become", "r#become"), +        ("box", "r#box"), +        ("break", "r#break"), +        ("const", "r#const"), +        ("continue", "r#continue"), +        ("crate", "r#crate"), +        ("do", "r#do"), +        ("dyn", "r#dyn"), +        ("else", "r#else"), +        ("enum", "r#enum"), +        ("extern", "r#extern"), +        ("false", "r#false"), +        ("final", "r#final"), +        ("fn", "r#fn"), +        ("for", "r#for"), +        ("if", "r#if"), +        ("impl", "r#impl"), +        ("in", "r#in"), +        ("let", "r#let"), +        ("macro", "r#macro"), +        ("match", "r#match"), +        ("mod", "r#mod"), +        ("move", "r#move"), +        ("mut", "r#mut"), +        ("override", "r#override"), +        ("priv", "r#priv"), +        ("pub", "r#pub"), +        ("ref", "r#ref"), +        ("return", "r#return"), +        ("static", "r#static"), +        ("struct", "r#struct"), +        ("trait", "r#trait"), +        ("true", "r#true"), +        ("try", "r#try"), +        ("type", "r#type"), +        ("typeof", "r#typeof"), +        ("unsafe", "r#unsafe"), +        ("unsized", "r#unsized"), +        ("use", "r#use"), +        ("virtual", "r#virtual"), +        ("where", "r#where"), +        ("while", "r#while"), +        ("yield", "r#yield"), +    ]; + +    if let Some(word) = USE_RAW.iter().find(|x| x.0 == ident) { +        word.1 +    } else { +        ident +    } +} + +fn normalize_identifier_bisect(ident: &str) -> &str { +    static USE_RAW: [(&str, &str); 47] = [ +        ("abstract", "r#abstract"), +        ("as", "r#as"), +        ("async", "r#async"), +        ("await", "r#await"), +        ("become", "r#become"), +        ("box", "r#box"), +        ("break", "r#break"), +        ("const", "r#const"), +        ("continue", "r#continue"), +        ("crate", "r#crate"), +        ("do", "r#do"), +        ("dyn", "r#dyn"), +        ("else", "r#else"), +        ("enum", "r#enum"), +        ("extern", "r#extern"), +        ("false", "r#false"), +        ("final", "r#final"), +        ("fn", "r#fn"), +        ("for", "r#for"), +        ("if", "r#if"), +        ("impl", "r#impl"), +        ("in", "r#in"), +        ("let", "r#let"), +        ("macro", "r#macro"), +        ("match", "r#match"), +        ("mod", "r#mod"), +        ("move", "r#move"), +        ("mut", "r#mut"), +        ("override", "r#override"), +        ("priv", "r#priv"), +        ("pub", "r#pub"), +        ("ref", "r#ref"), +        ("return", "r#return"), +        ("static", "r#static"), +        ("struct", "r#struct"), +        ("trait", "r#trait"), +        ("true", "r#true"), +        ("try", "r#try"), +        ("type", "r#type"), +        ("typeof", "r#typeof"), +        ("unsafe", "r#unsafe"), +        ("unsized", "r#unsized"), +        ("use", "r#use"), +        ("virtual", "r#virtual"), +        ("where", "r#where"), +        ("while", "r#while"), +        ("yield", "r#yield"), +    ]; + +    if let Ok(idx) = USE_RAW.binary_search_by(|(probe, _)| (*probe).cmp(ident)) { +        USE_RAW[idx].1 +    } else { +        ident +    } +} + +fn normalize_identifier_linear_replacement_only(ident: &str) -> &str { +    static USE_RAW: [&str; 47] = [ +        "r#abstract", +        "r#as", +        "r#async", +        "r#await", +        "r#become", +        "r#box", +        "r#break", +        "r#const", +        "r#continue", +        "r#crate", +        "r#do", +        "r#dyn", +        "r#else", +        "r#enum", +        "r#extern", +        "r#false", +        "r#final", +        "r#fn", +        "r#for", +        "r#if", +        "r#impl", +        "r#in", +        "r#let", +        "r#macro", +        "r#match", +        "r#mod", +        "r#move", +        "r#mut", +        "r#override", +        "r#priv", +        "r#pub", +        "r#ref", +        "r#return", +        "r#static", +        "r#struct", +        "r#trait", +        "r#true", +        "r#try", +        "r#type", +        "r#typeof", +        "r#unsafe", +        "r#unsized", +        "r#use", +        "r#virtual", +        "r#where", +        "r#while", +        "r#yield", +    ]; + +    USE_RAW +        .iter() +        .find(|x| &x[2..] == ident) +        .copied() +        .unwrap_or(ident) +} + +fn normalize_identifier_bisect_replacement_only(ident: &str) -> &str { +    static USE_RAW: [&str; 47] = [ +        "r#abstract", +        "r#as", +        "r#async", +        "r#await", +        "r#become", +        "r#box", +        "r#break", +        "r#const", +        "r#continue", +        "r#crate", +        "r#do", +        "r#dyn", +        "r#else", +        "r#enum", +        "r#extern", +        "r#false", +        "r#final", +        "r#fn", +        "r#for", +        "r#if", +        "r#impl", +        "r#in", +        "r#let", +        "r#macro", +        "r#match", +        "r#mod", +        "r#move", +        "r#mut", +        "r#override", +        "r#priv", +        "r#pub", +        "r#ref", +        "r#return", +        "r#static", +        "r#struct", +        "r#trait", +        "r#true", +        "r#try", +        "r#type", +        "r#typeof", +        "r#unsafe", +        "r#unsized", +        "r#use", +        "r#virtual", +        "r#where", +        "r#while", +        "r#yield", +    ]; + +    if let Ok(idx) = USE_RAW.binary_search_by(|probe| probe[2..].cmp(ident)) { +        USE_RAW[idx] +    } else { +        ident +    } +} + +fn normalize_identifier_linear_by_len(ident: &str) -> &str { +    const KW0: &[([u8; 8], [u8; 10])] = &[]; +    const KW1: &[([u8; 8], [u8; 10])] = &[]; +    const KW2: &[([u8; 8], [u8; 10])] = &[ +        (*b"as______", *b"r#as______"), +        (*b"do______", *b"r#do______"), +        (*b"fn______", *b"r#fn______"), +        (*b"if______", *b"r#if______"), +        (*b"in______", *b"r#in______"), +    ]; +    const KW3: &[([u8; 8], [u8; 10])] = &[ +        (*b"box_____", *b"r#box_____"), +        (*b"dyn_____", *b"r#dyn_____"), +        (*b"for_____", *b"r#for_____"), +        (*b"let_____", *b"r#let_____"), +        (*b"mod_____", *b"r#mod_____"), +        (*b"mut_____", *b"r#mut_____"), +        (*b"pub_____", *b"r#pub_____"), +        (*b"ref_____", *b"r#ref_____"), +        (*b"try_____", *b"r#try_____"), +        (*b"use_____", *b"r#use_____"), +    ]; +    const KW4: &[([u8; 8], [u8; 10])] = &[ +        (*b"else____", *b"r#else____"), +        (*b"enum____", *b"r#enum____"), +        (*b"impl____", *b"r#impl____"), +        (*b"move____", *b"r#move____"), +        (*b"priv____", *b"r#priv____"), +        (*b"true____", *b"r#true____"), +        (*b"type____", *b"r#type____"), +    ]; +    const KW5: &[([u8; 8], [u8; 10])] = &[ +        (*b"async___", *b"r#async___"), +        (*b"await___", *b"r#await___"), +        (*b"break___", *b"r#break___"), +        (*b"const___", *b"r#const___"), +        (*b"crate___", *b"r#crate___"), +        (*b"false___", *b"r#false___"), +        (*b"final___", *b"r#final___"), +        (*b"macro___", *b"r#macro___"), +        (*b"match___", *b"r#match___"), +        (*b"trait___", *b"r#trait___"), +        (*b"where___", *b"r#where___"), +        (*b"while___", *b"r#while___"), +        (*b"yield___", *b"r#yield___"), +    ]; +    const KW6: &[([u8; 8], [u8; 10])] = &[ +        (*b"become__", *b"r#become__"), +        (*b"extern__", *b"r#extern__"), +        (*b"return__", *b"r#return__"), +        (*b"static__", *b"r#static__"), +        (*b"struct__", *b"r#struct__"), +        (*b"typeof__", *b"r#typeof__"), +        (*b"unsafe__", *b"r#unsafe__"), +    ]; +    const KW7: &[([u8; 8], [u8; 10])] = &[ +        (*b"unsized_", *b"r#unsized_"), +        (*b"virtual_", *b"r#virtual_"), +    ]; +    const KW8: &[([u8; 8], [u8; 10])] = &[ +        (*b"abstract", *b"r#abstract"), +        (*b"continue", *b"r#continue"), +        (*b"override", *b"r#override"), +    ]; +    const KWS: &[&[([u8; 8], [u8; 10])]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8]; + +    if ident.len() > 8 { +        return ident; +    } +    let kws = KWS[ident.len()]; + +    let mut padded_ident = [b'_'; 8]; +    padded_ident[..ident.len()].copy_from_slice(ident.as_bytes()); + +    let replacement = match kws.iter().find(|(kw, _)| *kw == padded_ident) { +        Some((_, replacement)) => replacement, +        None => return ident, +    }; + +    // SAFETY: We know that the input byte slice is pure-ASCII. +    unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) } +} + +fn normalize_identifier_linear_by_len_replacement_only(ident: &str) -> &str { +    const KW0: &[[u8; 10]] = &[]; +    const KW1: &[[u8; 10]] = &[]; +    const KW2: &[[u8; 10]] = &[ +        *b"r#as______", +        *b"r#do______", +        *b"r#fn______", +        *b"r#if______", +        *b"r#in______", +    ]; +    const KW3: &[[u8; 10]] = &[ +        *b"r#box_____", +        *b"r#dyn_____", +        *b"r#for_____", +        *b"r#let_____", +        *b"r#mod_____", +        *b"r#mut_____", +        *b"r#pub_____", +        *b"r#ref_____", +        *b"r#try_____", +        *b"r#use_____", +    ]; +    const KW4: &[[u8; 10]] = &[ +        *b"r#else____", +        *b"r#enum____", +        *b"r#impl____", +        *b"r#move____", +        *b"r#priv____", +        *b"r#true____", +        *b"r#type____", +    ]; +    const KW5: &[[u8; 10]] = &[ +        *b"r#async___", +        *b"r#await___", +        *b"r#break___", +        *b"r#const___", +        *b"r#crate___", +        *b"r#false___", +        *b"r#final___", +        *b"r#macro___", +        *b"r#match___", +        *b"r#trait___", +        *b"r#where___", +        *b"r#while___", +        *b"r#yield___", +    ]; +    const KW6: &[[u8; 10]] = &[ +        *b"r#become__", +        *b"r#extern__", +        *b"r#return__", +        *b"r#static__", +        *b"r#struct__", +        *b"r#typeof__", +        *b"r#unsafe__", +    ]; +    const KW7: &[[u8; 10]] = &[*b"r#unsized_", *b"r#virtual_"]; +    const KW8: &[[u8; 10]] = &[*b"r#abstract", *b"r#continue", *b"r#override"]; +    const KWS: &[&[[u8; 10]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8]; + +    if ident.len() > 8 { +        return ident; +    } +    let kws = KWS[ident.len()]; + +    let mut padded_ident = [b'_'; 8]; +    padded_ident[..ident.len()].copy_from_slice(ident.as_bytes()); + +    let replacement = match kws +        .iter() +        .find(|probe| padded_ident == <[u8; 8]>::try_from(&probe[2..]).unwrap()) +    { +        Some(replacement) => replacement, +        None => return ident, +    }; + +    // SAFETY: We know that the input byte slice is pure-ASCII. +    unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) } +} + +fn normalize_identifier_bisect_by_len(ident: &str) -> &str { +    const KW0: &[([u8; 8], [u8; 10])] = &[]; +    const KW1: &[([u8; 8], [u8; 10])] = &[]; +    const KW2: &[([u8; 8], [u8; 10])] = &[ +        (*b"as______", *b"r#as______"), +        (*b"do______", *b"r#do______"), +        (*b"fn______", *b"r#fn______"), +        (*b"if______", *b"r#if______"), +        (*b"in______", *b"r#in______"), +    ]; +    const KW3: &[([u8; 8], [u8; 10])] = &[ +        (*b"box_____", *b"r#box_____"), +        (*b"dyn_____", *b"r#dyn_____"), +        (*b"for_____", *b"r#for_____"), +        (*b"let_____", *b"r#let_____"), +        (*b"mod_____", *b"r#mod_____"), +        (*b"mut_____", *b"r#mut_____"), +        (*b"pub_____", *b"r#pub_____"), +        (*b"ref_____", *b"r#ref_____"), +        (*b"try_____", *b"r#try_____"), +        (*b"use_____", *b"r#use_____"), +    ]; +    const KW4: &[([u8; 8], [u8; 10])] = &[ +        (*b"else____", *b"r#else____"), +        (*b"enum____", *b"r#enum____"), +        (*b"impl____", *b"r#impl____"), +        (*b"move____", *b"r#move____"), +        (*b"priv____", *b"r#priv____"), +        (*b"true____", *b"r#true____"), +        (*b"type____", *b"r#type____"), +    ]; +    const KW5: &[([u8; 8], [u8; 10])] = &[ +        (*b"async___", *b"r#async___"), +        (*b"await___", *b"r#await___"), +        (*b"break___", *b"r#break___"), +        (*b"const___", *b"r#const___"), +        (*b"crate___", *b"r#crate___"), +        (*b"false___", *b"r#false___"), +        (*b"final___", *b"r#final___"), +        (*b"macro___", *b"r#macro___"), +        (*b"match___", *b"r#match___"), +        (*b"trait___", *b"r#trait___"), +        (*b"where___", *b"r#where___"), +        (*b"while___", *b"r#while___"), +        (*b"yield___", *b"r#yield___"), +    ]; +    const KW6: &[([u8; 8], [u8; 10])] = &[ +        (*b"become__", *b"r#become__"), +        (*b"extern__", *b"r#extern__"), +        (*b"return__", *b"r#return__"), +        (*b"static__", *b"r#static__"), +        (*b"struct__", *b"r#struct__"), +        (*b"typeof__", *b"r#typeof__"), +        (*b"unsafe__", *b"r#unsafe__"), +    ]; +    const KW7: &[([u8; 8], [u8; 10])] = &[ +        (*b"unsized_", *b"r#unsized_"), +        (*b"virtual_", *b"r#virtual_"), +    ]; +    const KW8: &[([u8; 8], [u8; 10])] = &[ +        (*b"abstract", *b"r#abstract"), +        (*b"continue", *b"r#continue"), +        (*b"override", *b"r#override"), +    ]; +    const KWS: &[&[([u8; 8], [u8; 10])]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8]; + +    if ident.len() > 8 { +        return ident; +    } +    let kws = KWS[ident.len()]; + +    let mut padded_ident = [b'_'; 8]; +    padded_ident[..ident.len()].copy_from_slice(ident.as_bytes()); + +    if let Ok(idx) = kws.binary_search_by(|(probe, _)| probe.cmp(&padded_ident)) { +        // SAFETY: We know that the input byte slice is pure-ASCII. +        unsafe { std::str::from_utf8_unchecked(&kws[idx].1[..ident.len() + 2]) } +    } else { +        ident +    } +} + +fn normalize_identifier_bisect_by_len_replacement_only(ident: &str) -> &str { +    const KW0: &[[u8; 10]] = &[]; +    const KW1: &[[u8; 10]] = &[]; +    const KW2: &[[u8; 10]] = &[ +        *b"r#as______", +        *b"r#do______", +        *b"r#fn______", +        *b"r#if______", +        *b"r#in______", +    ]; +    const KW3: &[[u8; 10]] = &[ +        *b"r#box_____", +        *b"r#dyn_____", +        *b"r#for_____", +        *b"r#let_____", +        *b"r#mod_____", +        *b"r#mut_____", +        *b"r#pub_____", +        *b"r#ref_____", +        *b"r#try_____", +        *b"r#use_____", +    ]; +    const KW4: &[[u8; 10]] = &[ +        *b"r#else____", +        *b"r#enum____", +        *b"r#impl____", +        *b"r#move____", +        *b"r#priv____", +        *b"r#true____", +        *b"r#type____", +    ]; +    const KW5: &[[u8; 10]] = &[ +        *b"r#async___", +        *b"r#await___", +        *b"r#break___", +        *b"r#const___", +        *b"r#crate___", +        *b"r#false___", +        *b"r#final___", +        *b"r#macro___", +        *b"r#match___", +        *b"r#trait___", +        *b"r#where___", +        *b"r#while___", +        *b"r#yield___", +    ]; +    const KW6: &[[u8; 10]] = &[ +        *b"r#become__", +        *b"r#extern__", +        *b"r#return__", +        *b"r#static__", +        *b"r#struct__", +        *b"r#typeof__", +        *b"r#unsafe__", +    ]; +    const KW7: &[[u8; 10]] = &[*b"r#unsized_", *b"r#virtual_"]; +    const KW8: &[[u8; 10]] = &[*b"r#abstract", *b"r#continue", *b"r#override"]; +    const KWS: &[&[[u8; 10]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8]; + +    if ident.len() > 8 { +        return ident; +    } +    let kws = KWS[ident.len()]; + +    let mut padded_ident = [b'_'; 8]; +    padded_ident[..ident.len()].copy_from_slice(ident.as_bytes()); + +    let idx = +        kws.binary_search_by(|probe| <[u8; 8]>::try_from(&probe[2..]).unwrap().cmp(&padded_ident)); +    match idx { +        Ok(idx) => unsafe { std::str::from_utf8_unchecked(&kws[idx][..ident.len() + 2]) }, +        Err(_) => ident, +    } +} + +fn normalize_identifier_phf(ident: &str) -> &str { +    static USE_RAW: phf::Map<&str, &str> = phf::phf_map! { +        "abstract" => "r#abstract", +        "as" => "r#as", +        "async" => "r#async", +        "await" => "r#await", +        "become" => "r#become", +        "box" => "r#box", +        "break" => "r#break", +        "const" => "r#const", +        "continue" => "r#continue", +        "crate" => "r#crate", +        "do" => "r#do", +        "dyn" => "r#dyn", +        "else" => "r#else", +        "enum" => "r#enum", +        "extern" => "r#extern", +        "false" => "r#false", +        "final" => "r#final", +        "fn" => "r#fn", +        "for" => "r#for", +        "if" => "r#if", +        "impl" => "r#impl", +        "in" => "r#in", +        "let" => "r#let", +        "macro" => "r#macro", +        "match" => "r#match", +        "mod" => "r#mod", +        "move" => "r#move", +        "mut" => "r#mut", +        "override" => "r#override", +        "priv" => "r#priv", +        "pub" => "r#pub", +        "ref" => "r#ref", +        "return" => "r#return", +        "static" => "r#static", +        "struct" => "r#struct", +        "trait" => "r#trait", +        "true" => "r#true", +        "try" => "r#try", +        "type" => "r#type", +        "typeof" => "r#typeof", +        "unsafe" => "r#unsafe", +        "unsized" => "r#unsized", +        "use" => "r#use", +        "virtual" => "r#virtual", +        "where" => "r#where", +        "while" => "r#while", +        "yield" => "r#yield", +    }; + +    USE_RAW.get(ident).copied().unwrap_or(ident) +} + +struct Words(&'static str); + +impl Default for Words { +    fn default() -> Self { +        Self(include_str!("../../askama_derive/src/generator.rs")) +    } +} + +impl Iterator for Words { +    type Item = &'static str; + +    fn next(&mut self) -> Option<Self::Item> { +        let mut pos = self.0; +        loop { +            if pos.is_empty() { +                self.0 = ""; +                return None; +            } else if matches!(pos.as_bytes()[0], b'_' | b'a'..=b'z' | b'A'..=b'Z') { +                break; +            } + +            let mut chars = pos.chars(); +            chars.next(); +            pos = chars.as_str(); +        } + +        let start = pos; +        loop { +            if pos.is_empty() { +                self.0 = ""; +                return Some(start); +            } else if !matches!(pos.as_bytes()[0], b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9') { +                self.0 = pos; +                return Some(&start[..start.len() - pos.len()]); +            } + +            let mut chars = pos.chars(); +            chars.next(); +            pos = chars.as_str(); +        } +    } +} + +impl FusedIterator for Words {} | 
