aboutsummaryrefslogtreecommitdiffstats
path: root/askama_parser
diff options
context:
space:
mode:
authorLibravatar René Kijewski <rene.kijewski@fu-berlin.de>2023-07-14 12:46:57 +0200
committerLibravatar René Kijewski <Kijewski@users.noreply.github.com>2023-08-03 00:02:04 +0200
commit95ff27c087f9fd77e76ed069220d7b32d150a84e (patch)
tree49f9037daeec0561960aeadc3c3d4a9503ce0bdb /askama_parser
parentd38e2b4e2651b7c8b1a8bf614377011d046250f6 (diff)
downloadaskama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.gz
askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.bz2
askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.zip
Parse paths and identifiers only once
In the old implementation each variable in an expression would be parsed up to three times: * Try to parse a path because it contains a leading double colon, or infix double colons. * Try to parse it as path again by scanning for an identifier that contains an upper case character. * Fall back to scanning for any identifier. This PR turns all three steps into one, without the need for backtracking.
Diffstat (limited to 'askama_parser')
-rw-r--r--askama_parser/src/expr.rs20
-rw-r--r--askama_parser/src/lib.rs55
-rw-r--r--askama_parser/src/node.rs15
3 files changed, 53 insertions, 37 deletions
diff --git a/askama_parser/src/expr.rs b/askama_parser/src/expr.rs
index 8331e01..248bc19 100644
--- a/askama_parser/src/expr.rs
+++ b/askama_parser/src/expr.rs
@@ -9,7 +9,10 @@ use nom::multi::{fold_many0, many0, separated_list0};
use nom::sequence::{pair, preceded, terminated, tuple};
use nom::{error_position, IResult};
-use super::{bool_lit, char_lit, identifier, not_ws, num_lit, path, str_lit, ws};
+use super::{
+ bool_lit, char_lit, identifier, not_ws, num_lit, path_or_identifier, str_lit, ws,
+ PathOrIdentifier,
+};
macro_rules! expr_prec_layer {
( $name:ident, $inner:ident, $op:expr ) => {
@@ -141,9 +144,8 @@ impl<'a> Expr<'a> {
Self::num,
Self::str,
Self::char,
- Self::path,
+ Self::path_or_var,
Self::array,
- Self::var,
Self::group,
))(i)
}
@@ -186,13 +188,11 @@ impl<'a> Expr<'a> {
)(i)
}
- fn path(i: &'a str) -> IResult<&'a str, Self> {
- let (i, path) = path(i)?;
- Ok((i, Self::Path(path)))
- }
-
- fn var(i: &'a str) -> IResult<&'a str, Self> {
- map(identifier, Self::Var)(i)
+ fn path_or_var(i: &'a str) -> IResult<&'a str, Self> {
+ map(path_or_identifier, |v| match v {
+ PathOrIdentifier::Path(v) => Self::Path(v),
+ PathOrIdentifier::Identifier(v) => Self::Var(v),
+ })(i)
}
fn str(i: &'a str) -> IResult<&'a str, Self> {
diff --git a/askama_parser/src/lib.rs b/askama_parser/src/lib.rs
index 67f516c..1e48262 100644
--- a/askama_parser/src/lib.rs
+++ b/askama_parser/src/lib.rs
@@ -8,10 +8,10 @@ use nom::branch::alt;
use nom::bytes::complete::{escaped, is_not, tag, take_till};
use nom::character::complete::char;
use nom::character::complete::{anychar, digit1};
-use nom::combinator::{cut, eof, map, opt, recognize, value};
+use nom::combinator::{cut, eof, map, opt, recognize};
use nom::error::ErrorKind;
-use nom::multi::separated_list1;
-use nom::sequence::{delimited, pair, terminated, tuple};
+use nom::multi::many1;
+use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::{error_position, AsChar, IResult, InputTakeAtPosition};
pub mod expr;
@@ -216,31 +216,38 @@ fn char_lit(i: &str) -> IResult<&str, &str> {
Ok((i, s.unwrap_or_default()))
}
-fn path(i: &str) -> IResult<&str, Vec<&str>> {
- let root = opt(value("", ws(tag("::"))));
- let tail = separated_list1(ws(tag("::")), identifier);
+enum PathOrIdentifier<'a> {
+ Path(Vec<&'a str>),
+ Identifier(&'a str),
+}
- match tuple((root, identifier, ws(tag("::")), tail))(i) {
- Ok((i, (root, start, _, rest))) => {
- let mut path = Vec::new();
- path.extend(root);
+fn path_or_identifier(i: &str) -> IResult<&str, PathOrIdentifier<'_>> {
+ let root = ws(opt(tag("::")));
+ let tail = opt(many1(preceded(ws(tag("::")), identifier)));
+
+ let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?;
+ let rest = rest.as_deref().unwrap_or_default();
+
+ // The returned identifier can be assumed to be path if:
+ // - Contains both a lowercase and uppercase character, i.e. a type name like `None`
+ // - Doesn't contain any lowercase characters, i.e. it's a constant
+ // In short, if it contains any uppercase characters it's a path.
+ match (root, start, rest) {
+ (Some(_), start, tail) => {
+ let mut path = Vec::with_capacity(2 + tail.len());
+ path.push("");
path.push(start);
path.extend(rest);
- Ok((i, path))
+ Ok((i, PathOrIdentifier::Path(path)))
}
- Err(err) => {
- if let Ok((i, name)) = identifier(i) {
- // The returned identifier can be assumed to be path if:
- // - Contains both a lowercase and uppercase character, i.e. a type name like `None`
- // - Doesn't contain any lowercase characters, i.e. it's a constant
- // In short, if it contains any uppercase characters it's a path.
- if name.contains(char::is_uppercase) {
- return Ok((i, vec![name]));
- }
- }
-
- // If `identifier()` fails then just return the original error
- Err(err)
+ (None, name, []) if !name.contains(char::is_uppercase) => {
+ Ok((i, PathOrIdentifier::Identifier(name)))
+ }
+ (None, start, tail) => {
+ let mut path = Vec::with_capacity(1 + tail.len());
+ path.push(start);
+ path.extend(rest);
+ Ok((i, PathOrIdentifier::Path(path)))
}
}
}
diff --git a/askama_parser/src/node.rs b/askama_parser/src/node.rs
index 4c21ad1..a1aa7e2 100644
--- a/askama_parser/src/node.rs
+++ b/askama_parser/src/node.rs
@@ -3,15 +3,17 @@ use std::str;
use nom::branch::alt;
use nom::bytes::complete::{tag, take_until};
use nom::character::complete::char;
-use nom::combinator::{complete, consumed, cut, eof, map, not, opt, peek, recognize, value};
+use nom::combinator::{
+ complete, consumed, cut, eof, map, map_res, not, opt, peek, recognize, value,
+};
use nom::error::{Error, ErrorKind};
use nom::multi::{fold_many0, many0, many1, separated_list0, separated_list1};
use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::{error_position, IResult};
use super::{
- bool_lit, char_lit, identifier, is_ws, keyword, num_lit, path, skip_till, str_lit, ws, Expr,
- State,
+ bool_lit, char_lit, identifier, is_ws, keyword, num_lit, path_or_identifier, skip_till,
+ str_lit, ws, Expr, PathOrIdentifier, State,
};
#[derive(Debug, PartialEq)]
@@ -161,6 +163,13 @@ impl<'a> Target<'a> {
return Ok((i, Self::Tuple(Vec::new(), targets)));
}
+ let path = |i| {
+ map_res(path_or_identifier, |v| match v {
+ PathOrIdentifier::Path(v) => Ok(v),
+ PathOrIdentifier::Identifier(v) => Err(v),
+ })(i)
+ };
+
// match structs
let (i, path) = opt(path)(i)?;
if let Some(path) = path {