Generator: make `normalize_identifier` faster (#946)

`normalize_identifier` is called quite often in the generator, once for every variable name or path element that is written. This PR aims to speed up the function by * using a per-length input string length replacement map * binary searching the replacement map instead of a linear search Diffent, but functionally equivalent implementations were compared: ```text * linear search in one big map: 348.44 µs * binary search in one big map: 334.46 µs * linear search in a per-length map: 178.84 µs * binary search in a per-length map: 154.54 µs * perfect hashing: 170.87 µs ``` The winner of this competition is "binary search in a per-length map". It does not introduce new dependencies, but has the slight disadvantage that it uses one instance of `unsafe` code. I deem this disadvantage acceptable, though. Nb. It was also tested if a variant that only stores the replaced string would be faster. This "optimization" proved to be slower for all implementations except "binary search in a per-length map", for which it has the same runtime. Without a clear advantage to use the "optimized version", I chose to use the more easy to read slice of tuples variant. Obviously, for all measurements: YMMV.
author: René Kijewski <Kijewski@users.noreply.github.com> 2024-01-12 20:41:46 +0100
committer: GitHub <noreply@github.com> 2024-01-12 20:41:46 +0100
commit: 3cddb918897383402a58a5d74b49500571144056 (patch)
tree: 714d350e4e95db756f35da6ca95882ab8b2768f1
parent: b52274d6e8060fc7a05e5b7d854d95dd7084b24b (diff)
download: askama-3cddb918897383402a58a5d74b49500571144056.tar.gz
askama-3cddb918897383402a58a5d74b49500571144056.tar.bz2
askama-3cddb918897383402a58a5d74b49500571144056.zip
3 files changed, 804 insertions, 54 deletions
diff --git a/askama_derive/src/generator.rs b/askama_derive/src/generator.rs
index 2040cac..c1a8ebe 100644
--- a/askama_derive/src/generator.rs
+++ b/askama_derive/src/generator.rs
@@ -2040,60 +2040,111 @@ enum Writable<'a> {
 // because they are not allowed to be raw identifiers, and *loop*
 // because it's used something like a keyword in the template
 // language.
-static USE_RAW: [(&str, &str); 47] = [
-    ("as", "r#as"),
-    ("break", "r#break"),
-    ("const", "r#const"),
-    ("continue", "r#continue"),
-    ("crate", "r#crate"),
-    ("else", "r#else"),
-    ("enum", "r#enum"),
-    ("extern", "r#extern"),
-    ("false", "r#false"),
-    ("fn", "r#fn"),
-    ("for", "r#for"),
-    ("if", "r#if"),
-    ("impl", "r#impl"),
-    ("in", "r#in"),
-    ("let", "r#let"),
-    ("match", "r#match"),
-    ("mod", "r#mod"),
-    ("move", "r#move"),
-    ("mut", "r#mut"),
-    ("pub", "r#pub"),
-    ("ref", "r#ref"),
-    ("return", "r#return"),
-    ("static", "r#static"),
-    ("struct", "r#struct"),
-    ("trait", "r#trait"),
-    ("true", "r#true"),
-    ("type", "r#type"),
-    ("unsafe", "r#unsafe"),
-    ("use", "r#use"),
-    ("where", "r#where"),
-    ("while", "r#while"),
-    ("async", "r#async"),
-    ("await", "r#await"),
-    ("dyn", "r#dyn"),
-    ("abstract", "r#abstract"),
-    ("become", "r#become"),
-    ("box", "r#box"),
-    ("do", "r#do"),
-    ("final", "r#final"),
-    ("macro", "r#macro"),
-    ("override", "r#override"),
-    ("priv", "r#priv"),
-    ("typeof", "r#typeof"),
-    ("unsized", "r#unsized"),
-    ("virtual", "r#virtual"),
-    ("yield", "r#yield"),
-    ("try", "r#try"),
-];
-
 fn normalize_identifier(ident: &str) -> &str {
-    if let Some(word) = USE_RAW.iter().find(|x| x.0 == ident) {
-        word.1
-    } else {
-        ident
+    // This table works for as long as the replacement string is the original string
+    // prepended with "r#". The strings get right-padded to the same length with b'_'.
+    // While the code does not need it, please keep the list sorted when adding new
+    // keywords.
+
+    // FIXME: Replace with `[core:ascii::Char; MAX_REPL_LEN]` once
+    //        <https://github.com/rust-lang/rust/issues/110998> is stable.
+
+    const MAX_KW_LEN: usize = 8;
+    const MAX_REPL_LEN: usize = MAX_KW_LEN + 2;
+
+    const KW0: &[[u8; MAX_REPL_LEN]] = &[];
+    const KW1: &[[u8; MAX_REPL_LEN]] = &[];
+    const KW2: &[[u8; MAX_REPL_LEN]] = &[
+        *b"r#as______",
+        *b"r#do______",
+        *b"r#fn______",
+        *b"r#if______",
+        *b"r#in______",
+    ];
+    const KW3: &[[u8; MAX_REPL_LEN]] = &[
+        *b"r#box_____",
+        *b"r#dyn_____",
+        *b"r#for_____",
+        *b"r#let_____",
+        *b"r#mod_____",
+        *b"r#mut_____",
+        *b"r#pub_____",
+        *b"r#ref_____",
+        *b"r#try_____",
+        *b"r#use_____",
+    ];
+    const KW4: &[[u8; MAX_REPL_LEN]] = &[
+        *b"r#else____",
+        *b"r#enum____",
+        *b"r#impl____",
+        *b"r#move____",
+        *b"r#priv____",
+        *b"r#true____",
+        *b"r#type____",
+    ];
+    const KW5: &[[u8; MAX_REPL_LEN]] = &[
+        *b"r#async___",
+        *b"r#await___",
+        *b"r#break___",
+        *b"r#const___",
+        *b"r#crate___",
+        *b"r#false___",
+        *b"r#final___",
+        *b"r#macro___",
+        *b"r#match___",
+        *b"r#trait___",
+        *b"r#where___",
+        *b"r#while___",
+        *b"r#yield___",
+    ];
+    const KW6: &[[u8; MAX_REPL_LEN]] = &[
+        *b"r#become__",
+        *b"r#extern__",
+        *b"r#return__",
+        *b"r#static__",
+        *b"r#struct__",
+        *b"r#typeof__",
+        *b"r#unsafe__",
+    ];
+    const KW7: &[[u8; MAX_REPL_LEN]] = &[*b"r#unsized_", *b"r#virtual_"];
+    const KW8: &[[u8; MAX_REPL_LEN]] = &[*b"r#abstract", *b"r#continue", *b"r#override"];
+
+    const KWS: &[&[[u8; MAX_REPL_LEN]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+    // Ensure that all strings are ASCII, because we use `from_utf8_unchecked()` further down.
+    const _: () = {
+        let mut i = 0;
+        while i < KWS.len() {
+            let mut j = 0;
+            while KWS[i].len() < j {
+                let mut k = 0;
+                while KWS[i][j].len() < k {
+                    assert!(KWS[i][j][k].is_ascii());
+                    k += 1;
+                }
+                j += 1;
+            }
+            i += 1;
+        }
+    };
+
+    if ident.len() > MAX_KW_LEN {
+        return ident;
     }
+    let kws = KWS[ident.len()];
+
+    let mut padded_ident = [b'_'; MAX_KW_LEN];
+    padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+    // Since the individual buckets are quite short, a linear search is faster than a binary search.
+    let replacement = match kws
+        .iter()
+        .find(|probe| padded_ident == <[u8; MAX_KW_LEN]>::try_from(&probe[2..]).unwrap())
+    {
+        Some(replacement) => replacement,
+        None => return ident,
+    };
+
+    // SAFETY: We know that the input byte slice is pure-ASCII.
+    unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) }
 }
diff --git a/testing/Cargo.toml b/testing/Cargo.toml
index 793de53..20ef72a 100644
--- a/testing/Cargo.toml
+++ b/testing/Cargo.toml
@@ -15,6 +15,7 @@ markdown = ["comrak", "askama/markdown"]
 [dependencies]
 askama = { path = "../askama", version = "0.12" }
 comrak = { version = "0.20", default-features = false, optional = true }
+phf = { version = "0.11", features = ["macros" ]}
 serde_json = { version = "1.0", optional = true }
 
 [dev-dependencies]
@@ -25,3 +26,7 @@ version_check = "0.9"
 [[bench]]
 name = "all"
 harness = false
+
+[[bench]]
+name = "normalize_identifier"
+harness = false
diff --git a/testing/benches/normalize_identifier.rs b/testing/benches/normalize_identifier.rs
new file mode 100644
index 0000000..d84701d
--- /dev/null
+++ b/testing/benches/normalize_identifier.rs
@@ -0,0 +1,694 @@
+use std::collections::HashSet;
+use std::iter::FusedIterator;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+criterion_main!(benches);
+criterion_group!(benches, functions);
+
+fn functions(c: &mut Criterion) {
+    let words = Words::default().collect::<Vec<_>>();
+
+    macro_rules! bench_function {
+        ($($func:ident)*) => {
+            for word in words.iter().collect::<HashSet<_>>() {
+                let linear: &str = normalize_identifier_linear(word);
+                $(
+                    assert_eq!(linear, $func(word));
+                )*
+            }
+
+            $(
+                c.bench_function(stringify!($func), |b| {
+                    b.iter(|| {
+                        for s in &words {
+                            black_box($func(black_box(s)));
+                        }
+                    });
+                });
+            )*
+        };
+    }
+
+    bench_function! {
+        normalize_identifier_linear
+        normalize_identifier_linear_replacement_only
+        normalize_identifier_bisect
+        normalize_identifier_bisect_replacement_only
+        normalize_identifier_linear_by_len
+        normalize_identifier_linear_by_len_replacement_only
+        normalize_identifier_bisect_by_len
+        normalize_identifier_bisect_by_len_replacement_only
+        normalize_identifier_phf
+    }
+}
+
+fn normalize_identifier_linear(ident: &str) -> &str {
+    static USE_RAW: [(&str, &str); 47] = [
+        ("abstract", "r#abstract"),
+        ("as", "r#as"),
+        ("async", "r#async"),
+        ("await", "r#await"),
+        ("become", "r#become"),
+        ("box", "r#box"),
+        ("break", "r#break"),
+        ("const", "r#const"),
+        ("continue", "r#continue"),
+        ("crate", "r#crate"),
+        ("do", "r#do"),
+        ("dyn", "r#dyn"),
+        ("else", "r#else"),
+        ("enum", "r#enum"),
+        ("extern", "r#extern"),
+        ("false", "r#false"),
+        ("final", "r#final"),
+        ("fn", "r#fn"),
+        ("for", "r#for"),
+        ("if", "r#if"),
+        ("impl", "r#impl"),
+        ("in", "r#in"),
+        ("let", "r#let"),
+        ("macro", "r#macro"),
+        ("match", "r#match"),
+        ("mod", "r#mod"),
+        ("move", "r#move"),
+        ("mut", "r#mut"),
+        ("override", "r#override"),
+        ("priv", "r#priv"),
+        ("pub", "r#pub"),
+        ("ref", "r#ref"),
+        ("return", "r#return"),
+        ("static", "r#static"),
+        ("struct", "r#struct"),
+        ("trait", "r#trait"),
+        ("true", "r#true"),
+        ("try", "r#try"),
+        ("type", "r#type"),
+        ("typeof", "r#typeof"),
+        ("unsafe", "r#unsafe"),
+        ("unsized", "r#unsized"),
+        ("use", "r#use"),
+        ("virtual", "r#virtual"),
+        ("where", "r#where"),
+        ("while", "r#while"),
+        ("yield", "r#yield"),
+    ];
+
+    if let Some(word) = USE_RAW.iter().find(|x| x.0 == ident) {
+        word.1
+    } else {
+        ident
+    }
+}
+
+fn normalize_identifier_bisect(ident: &str) -> &str {
+    static USE_RAW: [(&str, &str); 47] = [
+        ("abstract", "r#abstract"),
+        ("as", "r#as"),
+        ("async", "r#async"),
+        ("await", "r#await"),
+        ("become", "r#become"),
+        ("box", "r#box"),
+        ("break", "r#break"),
+        ("const", "r#const"),
+        ("continue", "r#continue"),
+        ("crate", "r#crate"),
+        ("do", "r#do"),
+        ("dyn", "r#dyn"),
+        ("else", "r#else"),
+        ("enum", "r#enum"),
+        ("extern", "r#extern"),
+        ("false", "r#false"),
+        ("final", "r#final"),
+        ("fn", "r#fn"),
+        ("for", "r#for"),
+        ("if", "r#if"),
+        ("impl", "r#impl"),
+        ("in", "r#in"),
+        ("let", "r#let"),
+        ("macro", "r#macro"),
+        ("match", "r#match"),
+        ("mod", "r#mod"),
+        ("move", "r#move"),
+        ("mut", "r#mut"),
+        ("override", "r#override"),
+        ("priv", "r#priv"),
+        ("pub", "r#pub"),
+        ("ref", "r#ref"),
+        ("return", "r#return"),
+        ("static", "r#static"),
+        ("struct", "r#struct"),
+        ("trait", "r#trait"),
+        ("true", "r#true"),
+        ("try", "r#try"),
+        ("type", "r#type"),
+        ("typeof", "r#typeof"),
+        ("unsafe", "r#unsafe"),
+        ("unsized", "r#unsized"),
+        ("use", "r#use"),
+        ("virtual", "r#virtual"),
+        ("where", "r#where"),
+        ("while", "r#while"),
+        ("yield", "r#yield"),
+    ];
+
+    if let Ok(idx) = USE_RAW.binary_search_by(|(probe, _)| (*probe).cmp(ident)) {
+        USE_RAW[idx].1
+    } else {
+        ident
+    }
+}
+
+fn normalize_identifier_linear_replacement_only(ident: &str) -> &str {
+    static USE_RAW: [&str; 47] = [
+        "r#abstract",
+        "r#as",
+        "r#async",
+        "r#await",
+        "r#become",
+        "r#box",
+        "r#break",
+        "r#const",
+        "r#continue",
+        "r#crate",
+        "r#do",
+        "r#dyn",
+        "r#else",
+        "r#enum",
+        "r#extern",
+        "r#false",
+        "r#final",
+        "r#fn",
+        "r#for",
+        "r#if",
+        "r#impl",
+        "r#in",
+        "r#let",
+        "r#macro",
+        "r#match",
+        "r#mod",
+        "r#move",
+        "r#mut",
+        "r#override",
+        "r#priv",
+        "r#pub",
+        "r#ref",
+        "r#return",
+        "r#static",
+        "r#struct",
+        "r#trait",
+        "r#true",
+        "r#try",
+        "r#type",
+        "r#typeof",
+        "r#unsafe",
+        "r#unsized",
+        "r#use",
+        "r#virtual",
+        "r#where",
+        "r#while",
+        "r#yield",
+    ];
+
+    USE_RAW
+        .iter()
+        .find(|x| &x[2..] == ident)
+        .copied()
+        .unwrap_or(ident)
+}
+
+fn normalize_identifier_bisect_replacement_only(ident: &str) -> &str {
+    static USE_RAW: [&str; 47] = [
+        "r#abstract",
+        "r#as",
+        "r#async",
+        "r#await",
+        "r#become",
+        "r#box",
+        "r#break",
+        "r#const",
+        "r#continue",
+        "r#crate",
+        "r#do",
+        "r#dyn",
+        "r#else",
+        "r#enum",
+        "r#extern",
+        "r#false",
+        "r#final",
+        "r#fn",
+        "r#for",
+        "r#if",
+        "r#impl",
+        "r#in",
+        "r#let",
+        "r#macro",
+        "r#match",
+        "r#mod",
+        "r#move",
+        "r#mut",
+        "r#override",
+        "r#priv",
+        "r#pub",
+        "r#ref",
+        "r#return",
+        "r#static",
+        "r#struct",
+        "r#trait",
+        "r#true",
+        "r#try",
+        "r#type",
+        "r#typeof",
+        "r#unsafe",
+        "r#unsized",
+        "r#use",
+        "r#virtual",
+        "r#where",
+        "r#while",
+        "r#yield",
+    ];
+
+    if let Ok(idx) = USE_RAW.binary_search_by(|probe| probe[2..].cmp(ident)) {
+        USE_RAW[idx]
+    } else {
+        ident
+    }
+}
+
+fn normalize_identifier_linear_by_len(ident: &str) -> &str {
+    const KW0: &[([u8; 8], [u8; 10])] = &[];
+    const KW1: &[([u8; 8], [u8; 10])] = &[];
+    const KW2: &[([u8; 8], [u8; 10])] = &[
+        (*b"as______", *b"r#as______"),
+        (*b"do______", *b"r#do______"),
+        (*b"fn______", *b"r#fn______"),
+        (*b"if______", *b"r#if______"),
+        (*b"in______", *b"r#in______"),
+    ];
+    const KW3: &[([u8; 8], [u8; 10])] = &[
+        (*b"box_____", *b"r#box_____"),
+        (*b"dyn_____", *b"r#dyn_____"),
+        (*b"for_____", *b"r#for_____"),
+        (*b"let_____", *b"r#let_____"),
+        (*b"mod_____", *b"r#mod_____"),
+        (*b"mut_____", *b"r#mut_____"),
+        (*b"pub_____", *b"r#pub_____"),
+        (*b"ref_____", *b"r#ref_____"),
+        (*b"try_____", *b"r#try_____"),
+        (*b"use_____", *b"r#use_____"),
+    ];
+    const KW4: &[([u8; 8], [u8; 10])] = &[
+        (*b"else____", *b"r#else____"),
+        (*b"enum____", *b"r#enum____"),
+        (*b"impl____", *b"r#impl____"),
+        (*b"move____", *b"r#move____"),
+        (*b"priv____", *b"r#priv____"),
+        (*b"true____", *b"r#true____"),
+        (*b"type____", *b"r#type____"),
+    ];
+    const KW5: &[([u8; 8], [u8; 10])] = &[
+        (*b"async___", *b"r#async___"),
+        (*b"await___", *b"r#await___"),
+        (*b"break___", *b"r#break___"),
+        (*b"const___", *b"r#const___"),
+        (*b"crate___", *b"r#crate___"),
+        (*b"false___", *b"r#false___"),
+        (*b"final___", *b"r#final___"),
+        (*b"macro___", *b"r#macro___"),
+        (*b"match___", *b"r#match___"),
+        (*b"trait___", *b"r#trait___"),
+        (*b"where___", *b"r#where___"),
+        (*b"while___", *b"r#while___"),
+        (*b"yield___", *b"r#yield___"),
+    ];
+    const KW6: &[([u8; 8], [u8; 10])] = &[
+        (*b"become__", *b"r#become__"),
+        (*b"extern__", *b"r#extern__"),
+        (*b"return__", *b"r#return__"),
+        (*b"static__", *b"r#static__"),
+        (*b"struct__", *b"r#struct__"),
+        (*b"typeof__", *b"r#typeof__"),
+        (*b"unsafe__", *b"r#unsafe__"),
+    ];
+    const KW7: &[([u8; 8], [u8; 10])] = &[
+        (*b"unsized_", *b"r#unsized_"),
+        (*b"virtual_", *b"r#virtual_"),
+    ];
+    const KW8: &[([u8; 8], [u8; 10])] = &[
+        (*b"abstract", *b"r#abstract"),
+        (*b"continue", *b"r#continue"),
+        (*b"override", *b"r#override"),
+    ];
+    const KWS: &[&[([u8; 8], [u8; 10])]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+    if ident.len() > 8 {
+        return ident;
+    }
+    let kws = KWS[ident.len()];
+
+    let mut padded_ident = [b'_'; 8];
+    padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+    let replacement = match kws.iter().find(|(kw, _)| *kw == padded_ident) {
+        Some((_, replacement)) => replacement,
+        None => return ident,
+    };
+
+    // SAFETY: We know that the input byte slice is pure-ASCII.
+    unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) }
+}
+
+fn normalize_identifier_linear_by_len_replacement_only(ident: &str) -> &str {
+    const KW0: &[[u8; 10]] = &[];
+    const KW1: &[[u8; 10]] = &[];
+    const KW2: &[[u8; 10]] = &[
+        *b"r#as______",
+        *b"r#do______",
+        *b"r#fn______",
+        *b"r#if______",
+        *b"r#in______",
+    ];
+    const KW3: &[[u8; 10]] = &[
+        *b"r#box_____",
+        *b"r#dyn_____",
+        *b"r#for_____",
+        *b"r#let_____",
+        *b"r#mod_____",
+        *b"r#mut_____",
+        *b"r#pub_____",
+        *b"r#ref_____",
+        *b"r#try_____",
+        *b"r#use_____",
+    ];
+    const KW4: &[[u8; 10]] = &[
+        *b"r#else____",
+        *b"r#enum____",
+        *b"r#impl____",
+        *b"r#move____",
+        *b"r#priv____",
+        *b"r#true____",
+        *b"r#type____",
+    ];
+    const KW5: &[[u8; 10]] = &[
+        *b"r#async___",
+        *b"r#await___",
+        *b"r#break___",
+        *b"r#const___",
+        *b"r#crate___",
+        *b"r#false___",
+        *b"r#final___",
+        *b"r#macro___",
+        *b"r#match___",
+        *b"r#trait___",
+        *b"r#where___",
+        *b"r#while___",
+        *b"r#yield___",
+    ];
+    const KW6: &[[u8; 10]] = &[
+        *b"r#become__",
+        *b"r#extern__",
+        *b"r#return__",
+        *b"r#static__",
+        *b"r#struct__",
+        *b"r#typeof__",
+        *b"r#unsafe__",
+    ];
+    const KW7: &[[u8; 10]] = &[*b"r#unsized_", *b"r#virtual_"];
+    const KW8: &[[u8; 10]] = &[*b"r#abstract", *b"r#continue", *b"r#override"];
+    const KWS: &[&[[u8; 10]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+    if ident.len() > 8 {
+        return ident;
+    }
+    let kws = KWS[ident.len()];
+
+    let mut padded_ident = [b'_'; 8];
+    padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+    let replacement = match kws
+        .iter()
+        .find(|probe| padded_ident == <[u8; 8]>::try_from(&probe[2..]).unwrap())
+    {
+        Some(replacement) => replacement,
+        None => return ident,
+    };
+
+    // SAFETY: We know that the input byte slice is pure-ASCII.
+    unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) }
+}
+
+fn normalize_identifier_bisect_by_len(ident: &str) -> &str {
+    const KW0: &[([u8; 8], [u8; 10])] = &[];
+    const KW1: &[([u8; 8], [u8; 10])] = &[];
+    const KW2: &[([u8; 8], [u8; 10])] = &[
+        (*b"as______", *b"r#as______"),
+        (*b"do______", *b"r#do______"),
+        (*b"fn______", *b"r#fn______"),
+        (*b"if______", *b"r#if______"),
+        (*b"in______", *b"r#in______"),
+    ];
+    const KW3: &[([u8; 8], [u8; 10])] = &[
+        (*b"box_____", *b"r#box_____"),
+        (*b"dyn_____", *b"r#dyn_____"),
+        (*b"for_____", *b"r#for_____"),
+        (*b"let_____", *b"r#let_____"),
+        (*b"mod_____", *b"r#mod_____"),
+        (*b"mut_____", *b"r#mut_____"),
+        (*b"pub_____", *b"r#pub_____"),
+        (*b"ref_____", *b"r#ref_____"),
+        (*b"try_____", *b"r#try_____"),
+        (*b"use_____", *b"r#use_____"),
+    ];
+    const KW4: &[([u8; 8], [u8; 10])] = &[
+        (*b"else____", *b"r#else____"),
+        (*b"enum____", *b"r#enum____"),
+        (*b"impl____", *b"r#impl____"),
+        (*b"move____", *b"r#move____"),
+        (*b"priv____", *b"r#priv____"),
+        (*b"true____", *b"r#true____"),
+        (*b"type____", *b"r#type____"),
+    ];
+    const KW5: &[([u8; 8], [u8; 10])] = &[
+        (*b"async___", *b"r#async___"),
+        (*b"await___", *b"r#await___"),
+        (*b"break___", *b"r#break___"),
+        (*b"const___", *b"r#const___"),
+        (*b"crate___", *b"r#crate___"),
+        (*b"false___", *b"r#false___"),
+        (*b"final___", *b"r#final___"),
+        (*b"macro___", *b"r#macro___"),
+        (*b"match___", *b"r#match___"),
+        (*b"trait___", *b"r#trait___"),
+        (*b"where___", *b"r#where___"),
+        (*b"while___", *b"r#while___"),
+        (*b"yield___", *b"r#yield___"),
+    ];
+    const KW6: &[([u8; 8], [u8; 10])] = &[
+        (*b"become__", *b"r#become__"),
+        (*b"extern__", *b"r#extern__"),
+        (*b"return__", *b"r#return__"),
+        (*b"static__", *b"r#static__"),
+        (*b"struct__", *b"r#struct__"),
+        (*b"typeof__", *b"r#typeof__"),
+        (*b"unsafe__", *b"r#unsafe__"),
+    ];
+    const KW7: &[([u8; 8], [u8; 10])] = &[
+        (*b"unsized_", *b"r#unsized_"),
+        (*b"virtual_", *b"r#virtual_"),
+    ];
+    const KW8: &[([u8; 8], [u8; 10])] = &[
+        (*b"abstract", *b"r#abstract"),
+        (*b"continue", *b"r#continue"),
+        (*b"override", *b"r#override"),
+    ];
+    const KWS: &[&[([u8; 8], [u8; 10])]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+    if ident.len() > 8 {
+        return ident;
+    }
+    let kws = KWS[ident.len()];
+
+    let mut padded_ident = [b'_'; 8];
+    padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+    if let Ok(idx) = kws.binary_search_by(|(probe, _)| probe.cmp(&padded_ident)) {
+        // SAFETY: We know that the input byte slice is pure-ASCII.
+        unsafe { std::str::from_utf8_unchecked(&kws[idx].1[..ident.len() + 2]) }
+    } else {
+        ident
+    }
+}
+
+fn normalize_identifier_bisect_by_len_replacement_only(ident: &str) -> &str {
+    const KW0: &[[u8; 10]] = &[];
+    const KW1: &[[u8; 10]] = &[];
+    const KW2: &[[u8; 10]] = &[
+        *b"r#as______",
+        *b"r#do______",
+        *b"r#fn______",
+        *b"r#if______",
+        *b"r#in______",
+    ];
+    const KW3: &[[u8; 10]] = &[
+        *b"r#box_____",
+        *b"r#dyn_____",
+        *b"r#for_____",
+        *b"r#let_____",
+        *b"r#mod_____",
+        *b"r#mut_____",
+        *b"r#pub_____",
+        *b"r#ref_____",
+        *b"r#try_____",
+        *b"r#use_____",
+    ];
+    const KW4: &[[u8; 10]] = &[
+        *b"r#else____",
+        *b"r#enum____",
+        *b"r#impl____",
+        *b"r#move____",
+        *b"r#priv____",
+        *b"r#true____",
+        *b"r#type____",
+    ];
+    const KW5: &[[u8; 10]] = &[
+        *b"r#async___",
+        *b"r#await___",
+        *b"r#break___",
+        *b"r#const___",
+        *b"r#crate___",
+        *b"r#false___",
+        *b"r#final___",
+        *b"r#macro___",
+        *b"r#match___",
+        *b"r#trait___",
+        *b"r#where___",
+        *b"r#while___",
+        *b"r#yield___",
+    ];
+    const KW6: &[[u8; 10]] = &[
+        *b"r#become__",
+        *b"r#extern__",
+        *b"r#return__",
+        *b"r#static__",
+        *b"r#struct__",
+        *b"r#typeof__",
+        *b"r#unsafe__",
+    ];
+    const KW7: &[[u8; 10]] = &[*b"r#unsized_", *b"r#virtual_"];
+    const KW8: &[[u8; 10]] = &[*b"r#abstract", *b"r#continue", *b"r#override"];
+    const KWS: &[&[[u8; 10]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+    if ident.len() > 8 {
+        return ident;
+    }
+    let kws = KWS[ident.len()];
+
+    let mut padded_ident = [b'_'; 8];
+    padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+    let idx =
+        kws.binary_search_by(|probe| <[u8; 8]>::try_from(&probe[2..]).unwrap().cmp(&padded_ident));
+    match idx {
+        Ok(idx) => unsafe { std::str::from_utf8_unchecked(&kws[idx][..ident.len() + 2]) },
+        Err(_) => ident,
+    }
+}
+
+fn normalize_identifier_phf(ident: &str) -> &str {
+    static USE_RAW: phf::Map<&str, &str> = phf::phf_map! {
+        "abstract" => "r#abstract",
+        "as" => "r#as",
+        "async" => "r#async",
+        "await" => "r#await",
+        "become" => "r#become",
+        "box" => "r#box",
+        "break" => "r#break",
+        "const" => "r#const",
+        "continue" => "r#continue",
+        "crate" => "r#crate",
+        "do" => "r#do",
+        "dyn" => "r#dyn",
+        "else" => "r#else",
+        "enum" => "r#enum",
+        "extern" => "r#extern",
+        "false" => "r#false",
+        "final" => "r#final",
+        "fn" => "r#fn",
+        "for" => "r#for",
+        "if" => "r#if",
+        "impl" => "r#impl",
+        "in" => "r#in",
+        "let" => "r#let",
+        "macro" => "r#macro",
+        "match" => "r#match",
+        "mod" => "r#mod",
+        "move" => "r#move",
+        "mut" => "r#mut",
+        "override" => "r#override",
+        "priv" => "r#priv",
+        "pub" => "r#pub",
+        "ref" => "r#ref",
+        "return" => "r#return",
+        "static" => "r#static",
+        "struct" => "r#struct",
+        "trait" => "r#trait",
+        "true" => "r#true",
+        "try" => "r#try",
+        "type" => "r#type",
+        "typeof" => "r#typeof",
+        "unsafe" => "r#unsafe",
+        "unsized" => "r#unsized",
+        "use" => "r#use",
+        "virtual" => "r#virtual",
+        "where" => "r#where",
+        "while" => "r#while",
+        "yield" => "r#yield",
+    };
+
+    USE_RAW.get(ident).copied().unwrap_or(ident)
+}
+
+struct Words(&'static str);
+
+impl Default for Words {
+    fn default() -> Self {
+        Self(include_str!("../../askama_derive/src/generator.rs"))
+    }
+}
+
+impl Iterator for Words {
+    type Item = &'static str;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut pos = self.0;
+        loop {
+            if pos.is_empty() {
+                self.0 = "";
+                return None;
+            } else if matches!(pos.as_bytes()[0], b'_' | b'a'..=b'z' | b'A'..=b'Z') {
+                break;
+            }
+
+            let mut chars = pos.chars();
+            chars.next();
+            pos = chars.as_str();
+        }
+
+        let start = pos;
+        loop {
+            if pos.is_empty() {
+                self.0 = "";
+                return Some(start);
+            } else if !matches!(pos.as_bytes()[0], b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9') {
+                self.0 = pos;
+                return Some(&start[..start.len() - pos.len()]);
+            }
+
+            let mut chars = pos.chars();
+            chars.next();
+            pos = chars.as_str();
+        }
+    }
+}
+
+impl FusedIterator for Words {}
author	René Kijewski <Kijewski@users.noreply.github.com>	2024-01-12 20:41:46 +0100
committer	GitHub <noreply@github.com>	2024-01-12 20:41:46 +0100
commit	3cddb918897383402a58a5d74b49500571144056 (patch)
tree	714d350e4e95db756f35da6ca95882ab8b2768f1
parent	b52274d6e8060fc7a05e5b7d854d95dd7084b24b (diff)
download	askama-3cddb918897383402a58a5d74b49500571144056.tar.gz askama-3cddb918897383402a58a5d74b49500571144056.tar.bz2 askama-3cddb918897383402a58a5d74b49500571144056.zip