Parse paths and identifiers only once

In the old implementation each variable in an expression would be parsed up to three times: * Try to parse a path because it contains a leading double colon, or infix double colons. * Try to parse it as path again by scanning for an identifier that contains an upper case character. * Fall back to scanning for any identifier. This PR turns all three steps into one, without the need for backtracking.
author: René Kijewski <rene.kijewski@fu-berlin.de> 2023-07-14 12:46:57 +0200
committer: René Kijewski <Kijewski@users.noreply.github.com> 2023-08-03 00:02:04 +0200
commit: 95ff27c087f9fd77e76ed069220d7b32d150a84e (patch)
tree: 49f9037daeec0561960aeadc3c3d4a9503ce0bdb /askama_parser/src/lib.rs
parent: d38e2b4e2651b7c8b1a8bf614377011d046250f6 (diff)
download: askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.gz
askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.bz2
askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.zip
1 files changed, 31 insertions, 24 deletions
diff --git a/askama_parser/src/lib.rs b/askama_parser/src/lib.rs
index 67f516c..1e48262 100644
--- a/askama_parser/src/lib.rs
+++ b/askama_parser/src/lib.rs
@@ -8,10 +8,10 @@ use nom::branch::alt;
 use nom::bytes::complete::{escaped, is_not, tag, take_till};
 use nom::character::complete::char;
 use nom::character::complete::{anychar, digit1};
-use nom::combinator::{cut, eof, map, opt, recognize, value};
+use nom::combinator::{cut, eof, map, opt, recognize};
 use nom::error::ErrorKind;
-use nom::multi::separated_list1;
-use nom::sequence::{delimited, pair, terminated, tuple};
+use nom::multi::many1;
+use nom::sequence::{delimited, pair, preceded, terminated, tuple};
 use nom::{error_position, AsChar, IResult, InputTakeAtPosition};
 
 pub mod expr;
@@ -216,31 +216,38 @@ fn char_lit(i: &str) -> IResult<&str, &str> {
     Ok((i, s.unwrap_or_default()))
 }
 
-fn path(i: &str) -> IResult<&str, Vec<&str>> {
-    let root = opt(value("", ws(tag("::"))));
-    let tail = separated_list1(ws(tag("::")), identifier);
+enum PathOrIdentifier<'a> {
+    Path(Vec<&'a str>),
+    Identifier(&'a str),
+}
 
-    match tuple((root, identifier, ws(tag("::")), tail))(i) {
-        Ok((i, (root, start, _, rest))) => {
-            let mut path = Vec::new();
-            path.extend(root);
+fn path_or_identifier(i: &str) -> IResult<&str, PathOrIdentifier<'_>> {
+    let root = ws(opt(tag("::")));
+    let tail = opt(many1(preceded(ws(tag("::")), identifier)));
+
+    let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?;
+    let rest = rest.as_deref().unwrap_or_default();
+
+    // The returned identifier can be assumed to be path if:
+    // - Contains both a lowercase and uppercase character, i.e. a type name like `None`
+    // - Doesn't contain any lowercase characters, i.e. it's a constant
+    // In short, if it contains any uppercase characters it's a path.
+    match (root, start, rest) {
+        (Some(_), start, tail) => {
+            let mut path = Vec::with_capacity(2 + tail.len());
+            path.push("");
             path.push(start);
             path.extend(rest);
-            Ok((i, path))
+            Ok((i, PathOrIdentifier::Path(path)))
         }
-        Err(err) => {
-            if let Ok((i, name)) = identifier(i) {
-                // The returned identifier can be assumed to be path if:
-                // - Contains both a lowercase and uppercase character, i.e. a type name like `None`
-                // - Doesn't contain any lowercase characters, i.e. it's a constant
-                // In short, if it contains any uppercase characters it's a path.
-                if name.contains(char::is_uppercase) {
-                    return Ok((i, vec![name]));
-                }
-            }
-
-            // If `identifier()` fails then just return the original error
-            Err(err)
+        (None, name, []) if !name.contains(char::is_uppercase) => {
+            Ok((i, PathOrIdentifier::Identifier(name)))
+        }
+        (None, start, tail) => {
+            let mut path = Vec::with_capacity(1 + tail.len());
+            path.push(start);
+            path.extend(rest);
+            Ok((i, PathOrIdentifier::Path(path)))
         }
     }
 }
author	René Kijewski <rene.kijewski@fu-berlin.de>	2023-07-14 12:46:57 +0200
committer	René Kijewski <Kijewski@users.noreply.github.com>	2023-08-03 00:02:04 +0200
commit	95ff27c087f9fd77e76ed069220d7b32d150a84e (patch)
tree	49f9037daeec0561960aeadc3c3d4a9503ce0bdb /askama_parser/src/lib.rs
parent	d38e2b4e2651b7c8b1a8bf614377011d046250f6 (diff)
download	askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.gz askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.bz2 askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.zip