From 3cddb918897383402a58a5d74b49500571144056 Mon Sep 17 00:00:00 2001 From: René Kijewski Date: Fri, 12 Jan 2024 20:41:46 +0100 Subject: Generator: make `normalize_identifier` faster (#946) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `normalize_identifier` is called quite often in the generator, once for every variable name or path element that is written. This PR aims to speed up the function by * using a per-length input string length replacement map * binary searching the replacement map instead of a linear search Diffent, but functionally equivalent implementations were compared: ```text * linear search in one big map: 348.44 µs * binary search in one big map: 334.46 µs * linear search in a per-length map: 178.84 µs * binary search in a per-length map: 154.54 µs * perfect hashing: 170.87 µs ``` The winner of this competition is "binary search in a per-length map". It does not introduce new dependencies, but has the slight disadvantage that it uses one instance of `unsafe` code. I deem this disadvantage acceptable, though. Nb. It was also tested if a variant that only stores the replaced string would be faster. This "optimization" proved to be slower for all implementations except "binary search in a per-length map", for which it has the same runtime. Without a clear advantage to use the "optimized version", I chose to use the more easy to read slice of tuples variant. Obviously, for all measurements: YMMV. --- testing/Cargo.toml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'testing/Cargo.toml') diff --git a/testing/Cargo.toml b/testing/Cargo.toml index 793de53..20ef72a 100644 --- a/testing/Cargo.toml +++ b/testing/Cargo.toml @@ -15,6 +15,7 @@ markdown = ["comrak", "askama/markdown"] [dependencies] askama = { path = "../askama", version = "0.12" } comrak = { version = "0.20", default-features = false, optional = true } +phf = { version = "0.11", features = ["macros" ]} serde_json = { version = "1.0", optional = true } [dev-dependencies] @@ -25,3 +26,7 @@ version_check = "0.9" [[bench]] name = "all" harness = false + +[[bench]] +name = "normalize_identifier" +harness = false -- cgit