aboutsummaryrefslogtreecommitdiffstats
path: root/askama_parser/src/lib.rs
diff options
context:
space:
mode:
authorLibravatar René Kijewski <rene.kijewski@fu-berlin.de>2023-07-14 12:46:57 +0200
committerLibravatar René Kijewski <Kijewski@users.noreply.github.com>2023-08-03 00:02:04 +0200
commit95ff27c087f9fd77e76ed069220d7b32d150a84e (patch)
tree49f9037daeec0561960aeadc3c3d4a9503ce0bdb /askama_parser/src/lib.rs
parentd38e2b4e2651b7c8b1a8bf614377011d046250f6 (diff)
downloadaskama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.gz
askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.bz2
askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.zip
Parse paths and identifiers only once
In the old implementation each variable in an expression would be parsed up to three times: * Try to parse a path because it contains a leading double colon, or infix double colons. * Try to parse it as path again by scanning for an identifier that contains an upper case character. * Fall back to scanning for any identifier. This PR turns all three steps into one, without the need for backtracking.
Diffstat (limited to 'askama_parser/src/lib.rs')
-rw-r--r--askama_parser/src/lib.rs55
1 files changed, 31 insertions, 24 deletions
diff --git a/askama_parser/src/lib.rs b/askama_parser/src/lib.rs
index 67f516c..1e48262 100644
--- a/askama_parser/src/lib.rs
+++ b/askama_parser/src/lib.rs
@@ -8,10 +8,10 @@ use nom::branch::alt;
use nom::bytes::complete::{escaped, is_not, tag, take_till};
use nom::character::complete::char;
use nom::character::complete::{anychar, digit1};
-use nom::combinator::{cut, eof, map, opt, recognize, value};
+use nom::combinator::{cut, eof, map, opt, recognize};
use nom::error::ErrorKind;
-use nom::multi::separated_list1;
-use nom::sequence::{delimited, pair, terminated, tuple};
+use nom::multi::many1;
+use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::{error_position, AsChar, IResult, InputTakeAtPosition};
pub mod expr;
@@ -216,31 +216,38 @@ fn char_lit(i: &str) -> IResult<&str, &str> {
Ok((i, s.unwrap_or_default()))
}
-fn path(i: &str) -> IResult<&str, Vec<&str>> {
- let root = opt(value("", ws(tag("::"))));
- let tail = separated_list1(ws(tag("::")), identifier);
+enum PathOrIdentifier<'a> {
+ Path(Vec<&'a str>),
+ Identifier(&'a str),
+}
- match tuple((root, identifier, ws(tag("::")), tail))(i) {
- Ok((i, (root, start, _, rest))) => {
- let mut path = Vec::new();
- path.extend(root);
+fn path_or_identifier(i: &str) -> IResult<&str, PathOrIdentifier<'_>> {
+ let root = ws(opt(tag("::")));
+ let tail = opt(many1(preceded(ws(tag("::")), identifier)));
+
+ let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?;
+ let rest = rest.as_deref().unwrap_or_default();
+
+ // The returned identifier can be assumed to be path if:
+ // - Contains both a lowercase and uppercase character, i.e. a type name like `None`
+ // - Doesn't contain any lowercase characters, i.e. it's a constant
+ // In short, if it contains any uppercase characters it's a path.
+ match (root, start, rest) {
+ (Some(_), start, tail) => {
+ let mut path = Vec::with_capacity(2 + tail.len());
+ path.push("");
path.push(start);
path.extend(rest);
- Ok((i, path))
+ Ok((i, PathOrIdentifier::Path(path)))
}
- Err(err) => {
- if let Ok((i, name)) = identifier(i) {
- // The returned identifier can be assumed to be path if:
- // - Contains both a lowercase and uppercase character, i.e. a type name like `None`
- // - Doesn't contain any lowercase characters, i.e. it's a constant
- // In short, if it contains any uppercase characters it's a path.
- if name.contains(char::is_uppercase) {
- return Ok((i, vec![name]));
- }
- }
-
- // If `identifier()` fails then just return the original error
- Err(err)
+ (None, name, []) if !name.contains(char::is_uppercase) => {
+ Ok((i, PathOrIdentifier::Identifier(name)))
+ }
+ (None, start, tail) => {
+ let mut path = Vec::with_capacity(1 + tail.len());
+ path.push(start);
+ path.extend(rest);
+ Ok((i, PathOrIdentifier::Path(path)))
}
}
}