aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar René Kijewski <Kijewski@users.noreply.github.com>2024-01-12 20:41:46 +0100
committerLibravatar GitHub <noreply@github.com>2024-01-12 20:41:46 +0100
commit3cddb918897383402a58a5d74b49500571144056 (patch)
tree714d350e4e95db756f35da6ca95882ab8b2768f1
parentb52274d6e8060fc7a05e5b7d854d95dd7084b24b (diff)
downloadaskama-3cddb918897383402a58a5d74b49500571144056.tar.gz
askama-3cddb918897383402a58a5d74b49500571144056.tar.bz2
askama-3cddb918897383402a58a5d74b49500571144056.zip
Generator: make `normalize_identifier` faster (#946)
`normalize_identifier` is called quite often in the generator, once for every variable name or path element that is written. This PR aims to speed up the function by * using a per-length input string length replacement map * binary searching the replacement map instead of a linear search Diffent, but functionally equivalent implementations were compared: ```text * linear search in one big map: 348.44 µs * binary search in one big map: 334.46 µs * linear search in a per-length map: 178.84 µs * binary search in a per-length map: 154.54 µs * perfect hashing: 170.87 µs ``` The winner of this competition is "binary search in a per-length map". It does not introduce new dependencies, but has the slight disadvantage that it uses one instance of `unsafe` code. I deem this disadvantage acceptable, though. Nb. It was also tested if a variant that only stores the replaced string would be faster. This "optimization" proved to be slower for all implementations except "binary search in a per-length map", for which it has the same runtime. Without a clear advantage to use the "optimized version", I chose to use the more easy to read slice of tuples variant. Obviously, for all measurements: YMMV.
-rw-r--r--askama_derive/src/generator.rs159
-rw-r--r--testing/Cargo.toml5
-rw-r--r--testing/benches/normalize_identifier.rs694
3 files changed, 804 insertions, 54 deletions
diff --git a/askama_derive/src/generator.rs b/askama_derive/src/generator.rs
index 2040cac..c1a8ebe 100644
--- a/askama_derive/src/generator.rs
+++ b/askama_derive/src/generator.rs
@@ -2040,60 +2040,111 @@ enum Writable<'a> {
// because they are not allowed to be raw identifiers, and *loop*
// because it's used something like a keyword in the template
// language.
-static USE_RAW: [(&str, &str); 47] = [
- ("as", "r#as"),
- ("break", "r#break"),
- ("const", "r#const"),
- ("continue", "r#continue"),
- ("crate", "r#crate"),
- ("else", "r#else"),
- ("enum", "r#enum"),
- ("extern", "r#extern"),
- ("false", "r#false"),
- ("fn", "r#fn"),
- ("for", "r#for"),
- ("if", "r#if"),
- ("impl", "r#impl"),
- ("in", "r#in"),
- ("let", "r#let"),
- ("match", "r#match"),
- ("mod", "r#mod"),
- ("move", "r#move"),
- ("mut", "r#mut"),
- ("pub", "r#pub"),
- ("ref", "r#ref"),
- ("return", "r#return"),
- ("static", "r#static"),
- ("struct", "r#struct"),
- ("trait", "r#trait"),
- ("true", "r#true"),
- ("type", "r#type"),
- ("unsafe", "r#unsafe"),
- ("use", "r#use"),
- ("where", "r#where"),
- ("while", "r#while"),
- ("async", "r#async"),
- ("await", "r#await"),
- ("dyn", "r#dyn"),
- ("abstract", "r#abstract"),
- ("become", "r#become"),
- ("box", "r#box"),
- ("do", "r#do"),
- ("final", "r#final"),
- ("macro", "r#macro"),
- ("override", "r#override"),
- ("priv", "r#priv"),
- ("typeof", "r#typeof"),
- ("unsized", "r#unsized"),
- ("virtual", "r#virtual"),
- ("yield", "r#yield"),
- ("try", "r#try"),
-];
-
fn normalize_identifier(ident: &str) -> &str {
- if let Some(word) = USE_RAW.iter().find(|x| x.0 == ident) {
- word.1
- } else {
- ident
+ // This table works for as long as the replacement string is the original string
+ // prepended with "r#". The strings get right-padded to the same length with b'_'.
+ // While the code does not need it, please keep the list sorted when adding new
+ // keywords.
+
+ // FIXME: Replace with `[core:ascii::Char; MAX_REPL_LEN]` once
+ // <https://github.com/rust-lang/rust/issues/110998> is stable.
+
+ const MAX_KW_LEN: usize = 8;
+ const MAX_REPL_LEN: usize = MAX_KW_LEN + 2;
+
+ const KW0: &[[u8; MAX_REPL_LEN]] = &[];
+ const KW1: &[[u8; MAX_REPL_LEN]] = &[];
+ const KW2: &[[u8; MAX_REPL_LEN]] = &[
+ *b"r#as______",
+ *b"r#do______",
+ *b"r#fn______",
+ *b"r#if______",
+ *b"r#in______",
+ ];
+ const KW3: &[[u8; MAX_REPL_LEN]] = &[
+ *b"r#box_____",
+ *b"r#dyn_____",
+ *b"r#for_____",
+ *b"r#let_____",
+ *b"r#mod_____",
+ *b"r#mut_____",
+ *b"r#pub_____",
+ *b"r#ref_____",
+ *b"r#try_____",
+ *b"r#use_____",
+ ];
+ const KW4: &[[u8; MAX_REPL_LEN]] = &[
+ *b"r#else____",
+ *b"r#enum____",
+ *b"r#impl____",
+ *b"r#move____",
+ *b"r#priv____",
+ *b"r#true____",
+ *b"r#type____",
+ ];
+ const KW5: &[[u8; MAX_REPL_LEN]] = &[
+ *b"r#async___",
+ *b"r#await___",
+ *b"r#break___",
+ *b"r#const___",
+ *b"r#crate___",
+ *b"r#false___",
+ *b"r#final___",
+ *b"r#macro___",
+ *b"r#match___",
+ *b"r#trait___",
+ *b"r#where___",
+ *b"r#while___",
+ *b"r#yield___",
+ ];
+ const KW6: &[[u8; MAX_REPL_LEN]] = &[
+ *b"r#become__",
+ *b"r#extern__",
+ *b"r#return__",
+ *b"r#static__",
+ *b"r#struct__",
+ *b"r#typeof__",
+ *b"r#unsafe__",
+ ];
+ const KW7: &[[u8; MAX_REPL_LEN]] = &[*b"r#unsized_", *b"r#virtual_"];
+ const KW8: &[[u8; MAX_REPL_LEN]] = &[*b"r#abstract", *b"r#continue", *b"r#override"];
+
+ const KWS: &[&[[u8; MAX_REPL_LEN]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+ // Ensure that all strings are ASCII, because we use `from_utf8_unchecked()` further down.
+ const _: () = {
+ let mut i = 0;
+ while i < KWS.len() {
+ let mut j = 0;
+ while KWS[i].len() < j {
+ let mut k = 0;
+ while KWS[i][j].len() < k {
+ assert!(KWS[i][j][k].is_ascii());
+ k += 1;
+ }
+ j += 1;
+ }
+ i += 1;
+ }
+ };
+
+ if ident.len() > MAX_KW_LEN {
+ return ident;
}
+ let kws = KWS[ident.len()];
+
+ let mut padded_ident = [b'_'; MAX_KW_LEN];
+ padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+ // Since the individual buckets are quite short, a linear search is faster than a binary search.
+ let replacement = match kws
+ .iter()
+ .find(|probe| padded_ident == <[u8; MAX_KW_LEN]>::try_from(&probe[2..]).unwrap())
+ {
+ Some(replacement) => replacement,
+ None => return ident,
+ };
+
+ // SAFETY: We know that the input byte slice is pure-ASCII.
+ unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) }
}
diff --git a/testing/Cargo.toml b/testing/Cargo.toml
index 793de53..20ef72a 100644
--- a/testing/Cargo.toml
+++ b/testing/Cargo.toml
@@ -15,6 +15,7 @@ markdown = ["comrak", "askama/markdown"]
[dependencies]
askama = { path = "../askama", version = "0.12" }
comrak = { version = "0.20", default-features = false, optional = true }
+phf = { version = "0.11", features = ["macros" ]}
serde_json = { version = "1.0", optional = true }
[dev-dependencies]
@@ -25,3 +26,7 @@ version_check = "0.9"
[[bench]]
name = "all"
harness = false
+
+[[bench]]
+name = "normalize_identifier"
+harness = false
diff --git a/testing/benches/normalize_identifier.rs b/testing/benches/normalize_identifier.rs
new file mode 100644
index 0000000..d84701d
--- /dev/null
+++ b/testing/benches/normalize_identifier.rs
@@ -0,0 +1,694 @@
+use std::collections::HashSet;
+use std::iter::FusedIterator;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+criterion_main!(benches);
+criterion_group!(benches, functions);
+
+fn functions(c: &mut Criterion) {
+ let words = Words::default().collect::<Vec<_>>();
+
+ macro_rules! bench_function {
+ ($($func:ident)*) => {
+ for word in words.iter().collect::<HashSet<_>>() {
+ let linear: &str = normalize_identifier_linear(word);
+ $(
+ assert_eq!(linear, $func(word));
+ )*
+ }
+
+ $(
+ c.bench_function(stringify!($func), |b| {
+ b.iter(|| {
+ for s in &words {
+ black_box($func(black_box(s)));
+ }
+ });
+ });
+ )*
+ };
+ }
+
+ bench_function! {
+ normalize_identifier_linear
+ normalize_identifier_linear_replacement_only
+ normalize_identifier_bisect
+ normalize_identifier_bisect_replacement_only
+ normalize_identifier_linear_by_len
+ normalize_identifier_linear_by_len_replacement_only
+ normalize_identifier_bisect_by_len
+ normalize_identifier_bisect_by_len_replacement_only
+ normalize_identifier_phf
+ }
+}
+
+fn normalize_identifier_linear(ident: &str) -> &str {
+ static USE_RAW: [(&str, &str); 47] = [
+ ("abstract", "r#abstract"),
+ ("as", "r#as"),
+ ("async", "r#async"),
+ ("await", "r#await"),
+ ("become", "r#become"),
+ ("box", "r#box"),
+ ("break", "r#break"),
+ ("const", "r#const"),
+ ("continue", "r#continue"),
+ ("crate", "r#crate"),
+ ("do", "r#do"),
+ ("dyn", "r#dyn"),
+ ("else", "r#else"),
+ ("enum", "r#enum"),
+ ("extern", "r#extern"),
+ ("false", "r#false"),
+ ("final", "r#final"),
+ ("fn", "r#fn"),
+ ("for", "r#for"),
+ ("if", "r#if"),
+ ("impl", "r#impl"),
+ ("in", "r#in"),
+ ("let", "r#let"),
+ ("macro", "r#macro"),
+ ("match", "r#match"),
+ ("mod", "r#mod"),
+ ("move", "r#move"),
+ ("mut", "r#mut"),
+ ("override", "r#override"),
+ ("priv", "r#priv"),
+ ("pub", "r#pub"),
+ ("ref", "r#ref"),
+ ("return", "r#return"),
+ ("static", "r#static"),
+ ("struct", "r#struct"),
+ ("trait", "r#trait"),
+ ("true", "r#true"),
+ ("try", "r#try"),
+ ("type", "r#type"),
+ ("typeof", "r#typeof"),
+ ("unsafe", "r#unsafe"),
+ ("unsized", "r#unsized"),
+ ("use", "r#use"),
+ ("virtual", "r#virtual"),
+ ("where", "r#where"),
+ ("while", "r#while"),
+ ("yield", "r#yield"),
+ ];
+
+ if let Some(word) = USE_RAW.iter().find(|x| x.0 == ident) {
+ word.1
+ } else {
+ ident
+ }
+}
+
+fn normalize_identifier_bisect(ident: &str) -> &str {
+ static USE_RAW: [(&str, &str); 47] = [
+ ("abstract", "r#abstract"),
+ ("as", "r#as"),
+ ("async", "r#async"),
+ ("await", "r#await"),
+ ("become", "r#become"),
+ ("box", "r#box"),
+ ("break", "r#break"),
+ ("const", "r#const"),
+ ("continue", "r#continue"),
+ ("crate", "r#crate"),
+ ("do", "r#do"),
+ ("dyn", "r#dyn"),
+ ("else", "r#else"),
+ ("enum", "r#enum"),
+ ("extern", "r#extern"),
+ ("false", "r#false"),
+ ("final", "r#final"),
+ ("fn", "r#fn"),
+ ("for", "r#for"),
+ ("if", "r#if"),
+ ("impl", "r#impl"),
+ ("in", "r#in"),
+ ("let", "r#let"),
+ ("macro", "r#macro"),
+ ("match", "r#match"),
+ ("mod", "r#mod"),
+ ("move", "r#move"),
+ ("mut", "r#mut"),
+ ("override", "r#override"),
+ ("priv", "r#priv"),
+ ("pub", "r#pub"),
+ ("ref", "r#ref"),
+ ("return", "r#return"),
+ ("static", "r#static"),
+ ("struct", "r#struct"),
+ ("trait", "r#trait"),
+ ("true", "r#true"),
+ ("try", "r#try"),
+ ("type", "r#type"),
+ ("typeof", "r#typeof"),
+ ("unsafe", "r#unsafe"),
+ ("unsized", "r#unsized"),
+ ("use", "r#use"),
+ ("virtual", "r#virtual"),
+ ("where", "r#where"),
+ ("while", "r#while"),
+ ("yield", "r#yield"),
+ ];
+
+ if let Ok(idx) = USE_RAW.binary_search_by(|(probe, _)| (*probe).cmp(ident)) {
+ USE_RAW[idx].1
+ } else {
+ ident
+ }
+}
+
+fn normalize_identifier_linear_replacement_only(ident: &str) -> &str {
+ static USE_RAW: [&str; 47] = [
+ "r#abstract",
+ "r#as",
+ "r#async",
+ "r#await",
+ "r#become",
+ "r#box",
+ "r#break",
+ "r#const",
+ "r#continue",
+ "r#crate",
+ "r#do",
+ "r#dyn",
+ "r#else",
+ "r#enum",
+ "r#extern",
+ "r#false",
+ "r#final",
+ "r#fn",
+ "r#for",
+ "r#if",
+ "r#impl",
+ "r#in",
+ "r#let",
+ "r#macro",
+ "r#match",
+ "r#mod",
+ "r#move",
+ "r#mut",
+ "r#override",
+ "r#priv",
+ "r#pub",
+ "r#ref",
+ "r#return",
+ "r#static",
+ "r#struct",
+ "r#trait",
+ "r#true",
+ "r#try",
+ "r#type",
+ "r#typeof",
+ "r#unsafe",
+ "r#unsized",
+ "r#use",
+ "r#virtual",
+ "r#where",
+ "r#while",
+ "r#yield",
+ ];
+
+ USE_RAW
+ .iter()
+ .find(|x| &x[2..] == ident)
+ .copied()
+ .unwrap_or(ident)
+}
+
+fn normalize_identifier_bisect_replacement_only(ident: &str) -> &str {
+ static USE_RAW: [&str; 47] = [
+ "r#abstract",
+ "r#as",
+ "r#async",
+ "r#await",
+ "r#become",
+ "r#box",
+ "r#break",
+ "r#const",
+ "r#continue",
+ "r#crate",
+ "r#do",
+ "r#dyn",
+ "r#else",
+ "r#enum",
+ "r#extern",
+ "r#false",
+ "r#final",
+ "r#fn",
+ "r#for",
+ "r#if",
+ "r#impl",
+ "r#in",
+ "r#let",
+ "r#macro",
+ "r#match",
+ "r#mod",
+ "r#move",
+ "r#mut",
+ "r#override",
+ "r#priv",
+ "r#pub",
+ "r#ref",
+ "r#return",
+ "r#static",
+ "r#struct",
+ "r#trait",
+ "r#true",
+ "r#try",
+ "r#type",
+ "r#typeof",
+ "r#unsafe",
+ "r#unsized",
+ "r#use",
+ "r#virtual",
+ "r#where",
+ "r#while",
+ "r#yield",
+ ];
+
+ if let Ok(idx) = USE_RAW.binary_search_by(|probe| probe[2..].cmp(ident)) {
+ USE_RAW[idx]
+ } else {
+ ident
+ }
+}
+
+fn normalize_identifier_linear_by_len(ident: &str) -> &str {
+ const KW0: &[([u8; 8], [u8; 10])] = &[];
+ const KW1: &[([u8; 8], [u8; 10])] = &[];
+ const KW2: &[([u8; 8], [u8; 10])] = &[
+ (*b"as______", *b"r#as______"),
+ (*b"do______", *b"r#do______"),
+ (*b"fn______", *b"r#fn______"),
+ (*b"if______", *b"r#if______"),
+ (*b"in______", *b"r#in______"),
+ ];
+ const KW3: &[([u8; 8], [u8; 10])] = &[
+ (*b"box_____", *b"r#box_____"),
+ (*b"dyn_____", *b"r#dyn_____"),
+ (*b"for_____", *b"r#for_____"),
+ (*b"let_____", *b"r#let_____"),
+ (*b"mod_____", *b"r#mod_____"),
+ (*b"mut_____", *b"r#mut_____"),
+ (*b"pub_____", *b"r#pub_____"),
+ (*b"ref_____", *b"r#ref_____"),
+ (*b"try_____", *b"r#try_____"),
+ (*b"use_____", *b"r#use_____"),
+ ];
+ const KW4: &[([u8; 8], [u8; 10])] = &[
+ (*b"else____", *b"r#else____"),
+ (*b"enum____", *b"r#enum____"),
+ (*b"impl____", *b"r#impl____"),
+ (*b"move____", *b"r#move____"),
+ (*b"priv____", *b"r#priv____"),
+ (*b"true____", *b"r#true____"),
+ (*b"type____", *b"r#type____"),
+ ];
+ const KW5: &[([u8; 8], [u8; 10])] = &[
+ (*b"async___", *b"r#async___"),
+ (*b"await___", *b"r#await___"),
+ (*b"break___", *b"r#break___"),
+ (*b"const___", *b"r#const___"),
+ (*b"crate___", *b"r#crate___"),
+ (*b"false___", *b"r#false___"),
+ (*b"final___", *b"r#final___"),
+ (*b"macro___", *b"r#macro___"),
+ (*b"match___", *b"r#match___"),
+ (*b"trait___", *b"r#trait___"),
+ (*b"where___", *b"r#where___"),
+ (*b"while___", *b"r#while___"),
+ (*b"yield___", *b"r#yield___"),
+ ];
+ const KW6: &[([u8; 8], [u8; 10])] = &[
+ (*b"become__", *b"r#become__"),
+ (*b"extern__", *b"r#extern__"),
+ (*b"return__", *b"r#return__"),
+ (*b"static__", *b"r#static__"),
+ (*b"struct__", *b"r#struct__"),
+ (*b"typeof__", *b"r#typeof__"),
+ (*b"unsafe__", *b"r#unsafe__"),
+ ];
+ const KW7: &[([u8; 8], [u8; 10])] = &[
+ (*b"unsized_", *b"r#unsized_"),
+ (*b"virtual_", *b"r#virtual_"),
+ ];
+ const KW8: &[([u8; 8], [u8; 10])] = &[
+ (*b"abstract", *b"r#abstract"),
+ (*b"continue", *b"r#continue"),
+ (*b"override", *b"r#override"),
+ ];
+ const KWS: &[&[([u8; 8], [u8; 10])]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+ if ident.len() > 8 {
+ return ident;
+ }
+ let kws = KWS[ident.len()];
+
+ let mut padded_ident = [b'_'; 8];
+ padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+ let replacement = match kws.iter().find(|(kw, _)| *kw == padded_ident) {
+ Some((_, replacement)) => replacement,
+ None => return ident,
+ };
+
+ // SAFETY: We know that the input byte slice is pure-ASCII.
+ unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) }
+}
+
+fn normalize_identifier_linear_by_len_replacement_only(ident: &str) -> &str {
+ const KW0: &[[u8; 10]] = &[];
+ const KW1: &[[u8; 10]] = &[];
+ const KW2: &[[u8; 10]] = &[
+ *b"r#as______",
+ *b"r#do______",
+ *b"r#fn______",
+ *b"r#if______",
+ *b"r#in______",
+ ];
+ const KW3: &[[u8; 10]] = &[
+ *b"r#box_____",
+ *b"r#dyn_____",
+ *b"r#for_____",
+ *b"r#let_____",
+ *b"r#mod_____",
+ *b"r#mut_____",
+ *b"r#pub_____",
+ *b"r#ref_____",
+ *b"r#try_____",
+ *b"r#use_____",
+ ];
+ const KW4: &[[u8; 10]] = &[
+ *b"r#else____",
+ *b"r#enum____",
+ *b"r#impl____",
+ *b"r#move____",
+ *b"r#priv____",
+ *b"r#true____",
+ *b"r#type____",
+ ];
+ const KW5: &[[u8; 10]] = &[
+ *b"r#async___",
+ *b"r#await___",
+ *b"r#break___",
+ *b"r#const___",
+ *b"r#crate___",
+ *b"r#false___",
+ *b"r#final___",
+ *b"r#macro___",
+ *b"r#match___",
+ *b"r#trait___",
+ *b"r#where___",
+ *b"r#while___",
+ *b"r#yield___",
+ ];
+ const KW6: &[[u8; 10]] = &[
+ *b"r#become__",
+ *b"r#extern__",
+ *b"r#return__",
+ *b"r#static__",
+ *b"r#struct__",
+ *b"r#typeof__",
+ *b"r#unsafe__",
+ ];
+ const KW7: &[[u8; 10]] = &[*b"r#unsized_", *b"r#virtual_"];
+ const KW8: &[[u8; 10]] = &[*b"r#abstract", *b"r#continue", *b"r#override"];
+ const KWS: &[&[[u8; 10]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+ if ident.len() > 8 {
+ return ident;
+ }
+ let kws = KWS[ident.len()];
+
+ let mut padded_ident = [b'_'; 8];
+ padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+ let replacement = match kws
+ .iter()
+ .find(|probe| padded_ident == <[u8; 8]>::try_from(&probe[2..]).unwrap())
+ {
+ Some(replacement) => replacement,
+ None => return ident,
+ };
+
+ // SAFETY: We know that the input byte slice is pure-ASCII.
+ unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) }
+}
+
+fn normalize_identifier_bisect_by_len(ident: &str) -> &str {
+ const KW0: &[([u8; 8], [u8; 10])] = &[];
+ const KW1: &[([u8; 8], [u8; 10])] = &[];
+ const KW2: &[([u8; 8], [u8; 10])] = &[
+ (*b"as______", *b"r#as______"),
+ (*b"do______", *b"r#do______"),
+ (*b"fn______", *b"r#fn______"),
+ (*b"if______", *b"r#if______"),
+ (*b"in______", *b"r#in______"),
+ ];
+ const KW3: &[([u8; 8], [u8; 10])] = &[
+ (*b"box_____", *b"r#box_____"),
+ (*b"dyn_____", *b"r#dyn_____"),
+ (*b"for_____", *b"r#for_____"),
+ (*b"let_____", *b"r#let_____"),
+ (*b"mod_____", *b"r#mod_____"),
+ (*b"mut_____", *b"r#mut_____"),
+ (*b"pub_____", *b"r#pub_____"),
+ (*b"ref_____", *b"r#ref_____"),
+ (*b"try_____", *b"r#try_____"),
+ (*b"use_____", *b"r#use_____"),
+ ];
+ const KW4: &[([u8; 8], [u8; 10])] = &[
+ (*b"else____", *b"r#else____"),
+ (*b"enum____", *b"r#enum____"),
+ (*b"impl____", *b"r#impl____"),
+ (*b"move____", *b"r#move____"),
+ (*b"priv____", *b"r#priv____"),
+ (*b"true____", *b"r#true____"),
+ (*b"type____", *b"r#type____"),
+ ];
+ const KW5: &[([u8; 8], [u8; 10])] = &[
+ (*b"async___", *b"r#async___"),
+ (*b"await___", *b"r#await___"),
+ (*b"break___", *b"r#break___"),
+ (*b"const___", *b"r#const___"),
+ (*b"crate___", *b"r#crate___"),
+ (*b"false___", *b"r#false___"),
+ (*b"final___", *b"r#final___"),
+ (*b"macro___", *b"r#macro___"),
+ (*b"match___", *b"r#match___"),
+ (*b"trait___", *b"r#trait___"),
+ (*b"where___", *b"r#where___"),
+ (*b"while___", *b"r#while___"),
+ (*b"yield___", *b"r#yield___"),
+ ];
+ const KW6: &[([u8; 8], [u8; 10])] = &[
+ (*b"become__", *b"r#become__"),
+ (*b"extern__", *b"r#extern__"),
+ (*b"return__", *b"r#return__"),
+ (*b"static__", *b"r#static__"),
+ (*b"struct__", *b"r#struct__"),
+ (*b"typeof__", *b"r#typeof__"),
+ (*b"unsafe__", *b"r#unsafe__"),
+ ];
+ const KW7: &[([u8; 8], [u8; 10])] = &[
+ (*b"unsized_", *b"r#unsized_"),
+ (*b"virtual_", *b"r#virtual_"),
+ ];
+ const KW8: &[([u8; 8], [u8; 10])] = &[
+ (*b"abstract", *b"r#abstract"),
+ (*b"continue", *b"r#continue"),
+ (*b"override", *b"r#override"),
+ ];
+ const KWS: &[&[([u8; 8], [u8; 10])]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+ if ident.len() > 8 {
+ return ident;
+ }
+ let kws = KWS[ident.len()];
+
+ let mut padded_ident = [b'_'; 8];
+ padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+ if let Ok(idx) = kws.binary_search_by(|(probe, _)| probe.cmp(&padded_ident)) {
+ // SAFETY: We know that the input byte slice is pure-ASCII.
+ unsafe { std::str::from_utf8_unchecked(&kws[idx].1[..ident.len() + 2]) }
+ } else {
+ ident
+ }
+}
+
+fn normalize_identifier_bisect_by_len_replacement_only(ident: &str) -> &str {
+ const KW0: &[[u8; 10]] = &[];
+ const KW1: &[[u8; 10]] = &[];
+ const KW2: &[[u8; 10]] = &[
+ *b"r#as______",
+ *b"r#do______",
+ *b"r#fn______",
+ *b"r#if______",
+ *b"r#in______",
+ ];
+ const KW3: &[[u8; 10]] = &[
+ *b"r#box_____",
+ *b"r#dyn_____",
+ *b"r#for_____",
+ *b"r#let_____",
+ *b"r#mod_____",
+ *b"r#mut_____",
+ *b"r#pub_____",
+ *b"r#ref_____",
+ *b"r#try_____",
+ *b"r#use_____",
+ ];
+ const KW4: &[[u8; 10]] = &[
+ *b"r#else____",
+ *b"r#enum____",
+ *b"r#impl____",
+ *b"r#move____",
+ *b"r#priv____",
+ *b"r#true____",
+ *b"r#type____",
+ ];
+ const KW5: &[[u8; 10]] = &[
+ *b"r#async___",
+ *b"r#await___",
+ *b"r#break___",
+ *b"r#const___",
+ *b"r#crate___",
+ *b"r#false___",
+ *b"r#final___",
+ *b"r#macro___",
+ *b"r#match___",
+ *b"r#trait___",
+ *b"r#where___",
+ *b"r#while___",
+ *b"r#yield___",
+ ];
+ const KW6: &[[u8; 10]] = &[
+ *b"r#become__",
+ *b"r#extern__",
+ *b"r#return__",
+ *b"r#static__",
+ *b"r#struct__",
+ *b"r#typeof__",
+ *b"r#unsafe__",
+ ];
+ const KW7: &[[u8; 10]] = &[*b"r#unsized_", *b"r#virtual_"];
+ const KW8: &[[u8; 10]] = &[*b"r#abstract", *b"r#continue", *b"r#override"];
+ const KWS: &[&[[u8; 10]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];
+
+ if ident.len() > 8 {
+ return ident;
+ }
+ let kws = KWS[ident.len()];
+
+ let mut padded_ident = [b'_'; 8];
+ padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
+
+ let idx =
+ kws.binary_search_by(|probe| <[u8; 8]>::try_from(&probe[2..]).unwrap().cmp(&padded_ident));
+ match idx {
+ Ok(idx) => unsafe { std::str::from_utf8_unchecked(&kws[idx][..ident.len() + 2]) },
+ Err(_) => ident,
+ }
+}
+
+fn normalize_identifier_phf(ident: &str) -> &str {
+ static USE_RAW: phf::Map<&str, &str> = phf::phf_map! {
+ "abstract" => "r#abstract",
+ "as" => "r#as",
+ "async" => "r#async",
+ "await" => "r#await",
+ "become" => "r#become",
+ "box" => "r#box",
+ "break" => "r#break",
+ "const" => "r#const",
+ "continue" => "r#continue",
+ "crate" => "r#crate",
+ "do" => "r#do",
+ "dyn" => "r#dyn",
+ "else" => "r#else",
+ "enum" => "r#enum",
+ "extern" => "r#extern",
+ "false" => "r#false",
+ "final" => "r#final",
+ "fn" => "r#fn",
+ "for" => "r#for",
+ "if" => "r#if",
+ "impl" => "r#impl",
+ "in" => "r#in",
+ "let" => "r#let",
+ "macro" => "r#macro",
+ "match" => "r#match",
+ "mod" => "r#mod",
+ "move" => "r#move",
+ "mut" => "r#mut",
+ "override" => "r#override",
+ "priv" => "r#priv",
+ "pub" => "r#pub",
+ "ref" => "r#ref",
+ "return" => "r#return",
+ "static" => "r#static",
+ "struct" => "r#struct",
+ "trait" => "r#trait",
+ "true" => "r#true",
+ "try" => "r#try",
+ "type" => "r#type",
+ "typeof" => "r#typeof",
+ "unsafe" => "r#unsafe",
+ "unsized" => "r#unsized",
+ "use" => "r#use",
+ "virtual" => "r#virtual",
+ "where" => "r#where",
+ "while" => "r#while",
+ "yield" => "r#yield",
+ };
+
+ USE_RAW.get(ident).copied().unwrap_or(ident)
+}
+
+struct Words(&'static str);
+
+impl Default for Words {
+ fn default() -> Self {
+ Self(include_str!("../../askama_derive/src/generator.rs"))
+ }
+}
+
+impl Iterator for Words {
+ type Item = &'static str;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let mut pos = self.0;
+ loop {
+ if pos.is_empty() {
+ self.0 = "";
+ return None;
+ } else if matches!(pos.as_bytes()[0], b'_' | b'a'..=b'z' | b'A'..=b'Z') {
+ break;
+ }
+
+ let mut chars = pos.chars();
+ chars.next();
+ pos = chars.as_str();
+ }
+
+ let start = pos;
+ loop {
+ if pos.is_empty() {
+ self.0 = "";
+ return Some(start);
+ } else if !matches!(pos.as_bytes()[0], b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9') {
+ self.0 = pos;
+ return Some(&start[..start.len() - pos.len()]);
+ }
+
+ let mut chars = pos.chars();
+ chars.next();
+ pos = chars.as_str();
+ }
+ }
+}
+
+impl FusedIterator for Words {}