diff options
author | René Kijewski <rene.kijewski@fu-berlin.de> | 2023-07-14 12:46:57 +0200 |
---|---|---|
committer | René Kijewski <Kijewski@users.noreply.github.com> | 2023-08-03 00:02:04 +0200 |
commit | 95ff27c087f9fd77e76ed069220d7b32d150a84e (patch) | |
tree | 49f9037daeec0561960aeadc3c3d4a9503ce0bdb /askama_parser | |
parent | d38e2b4e2651b7c8b1a8bf614377011d046250f6 (diff) | |
download | askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.gz askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.tar.bz2 askama-95ff27c087f9fd77e76ed069220d7b32d150a84e.zip |
Parse paths and identifiers only once
In the old implementation each variable in an expression would be parsed
up to three times:
* Try to parse a path because it contains a leading double colon, or
infix double colons.
* Try to parse it as path again by scanning for an identifier that
contains an upper case character.
* Fall back to scanning for any identifier.
This PR turns all three steps into one, without the need for
backtracking.
Diffstat (limited to 'askama_parser')
-rw-r--r-- | askama_parser/src/expr.rs | 20 | ||||
-rw-r--r-- | askama_parser/src/lib.rs | 55 | ||||
-rw-r--r-- | askama_parser/src/node.rs | 15 |
3 files changed, 53 insertions, 37 deletions
diff --git a/askama_parser/src/expr.rs b/askama_parser/src/expr.rs index 8331e01..248bc19 100644 --- a/askama_parser/src/expr.rs +++ b/askama_parser/src/expr.rs @@ -9,7 +9,10 @@ use nom::multi::{fold_many0, many0, separated_list0}; use nom::sequence::{pair, preceded, terminated, tuple}; use nom::{error_position, IResult}; -use super::{bool_lit, char_lit, identifier, not_ws, num_lit, path, str_lit, ws}; +use super::{ + bool_lit, char_lit, identifier, not_ws, num_lit, path_or_identifier, str_lit, ws, + PathOrIdentifier, +}; macro_rules! expr_prec_layer { ( $name:ident, $inner:ident, $op:expr ) => { @@ -141,9 +144,8 @@ impl<'a> Expr<'a> { Self::num, Self::str, Self::char, - Self::path, + Self::path_or_var, Self::array, - Self::var, Self::group, ))(i) } @@ -186,13 +188,11 @@ impl<'a> Expr<'a> { )(i) } - fn path(i: &'a str) -> IResult<&'a str, Self> { - let (i, path) = path(i)?; - Ok((i, Self::Path(path))) - } - - fn var(i: &'a str) -> IResult<&'a str, Self> { - map(identifier, Self::Var)(i) + fn path_or_var(i: &'a str) -> IResult<&'a str, Self> { + map(path_or_identifier, |v| match v { + PathOrIdentifier::Path(v) => Self::Path(v), + PathOrIdentifier::Identifier(v) => Self::Var(v), + })(i) } fn str(i: &'a str) -> IResult<&'a str, Self> { diff --git a/askama_parser/src/lib.rs b/askama_parser/src/lib.rs index 67f516c..1e48262 100644 --- a/askama_parser/src/lib.rs +++ b/askama_parser/src/lib.rs @@ -8,10 +8,10 @@ use nom::branch::alt; use nom::bytes::complete::{escaped, is_not, tag, take_till}; use nom::character::complete::char; use nom::character::complete::{anychar, digit1}; -use nom::combinator::{cut, eof, map, opt, recognize, value}; +use nom::combinator::{cut, eof, map, opt, recognize}; use nom::error::ErrorKind; -use nom::multi::separated_list1; -use nom::sequence::{delimited, pair, terminated, tuple}; +use nom::multi::many1; +use nom::sequence::{delimited, pair, preceded, terminated, tuple}; use nom::{error_position, AsChar, IResult, InputTakeAtPosition}; pub mod expr; @@ -216,31 +216,38 @@ fn char_lit(i: &str) -> IResult<&str, &str> { Ok((i, s.unwrap_or_default())) } -fn path(i: &str) -> IResult<&str, Vec<&str>> { - let root = opt(value("", ws(tag("::")))); - let tail = separated_list1(ws(tag("::")), identifier); +enum PathOrIdentifier<'a> { + Path(Vec<&'a str>), + Identifier(&'a str), +} - match tuple((root, identifier, ws(tag("::")), tail))(i) { - Ok((i, (root, start, _, rest))) => { - let mut path = Vec::new(); - path.extend(root); +fn path_or_identifier(i: &str) -> IResult<&str, PathOrIdentifier<'_>> { + let root = ws(opt(tag("::"))); + let tail = opt(many1(preceded(ws(tag("::")), identifier))); + + let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?; + let rest = rest.as_deref().unwrap_or_default(); + + // The returned identifier can be assumed to be path if: + // - Contains both a lowercase and uppercase character, i.e. a type name like `None` + // - Doesn't contain any lowercase characters, i.e. it's a constant + // In short, if it contains any uppercase characters it's a path. + match (root, start, rest) { + (Some(_), start, tail) => { + let mut path = Vec::with_capacity(2 + tail.len()); + path.push(""); path.push(start); path.extend(rest); - Ok((i, path)) + Ok((i, PathOrIdentifier::Path(path))) } - Err(err) => { - if let Ok((i, name)) = identifier(i) { - // The returned identifier can be assumed to be path if: - // - Contains both a lowercase and uppercase character, i.e. a type name like `None` - // - Doesn't contain any lowercase characters, i.e. it's a constant - // In short, if it contains any uppercase characters it's a path. - if name.contains(char::is_uppercase) { - return Ok((i, vec![name])); - } - } - - // If `identifier()` fails then just return the original error - Err(err) + (None, name, []) if !name.contains(char::is_uppercase) => { + Ok((i, PathOrIdentifier::Identifier(name))) + } + (None, start, tail) => { + let mut path = Vec::with_capacity(1 + tail.len()); + path.push(start); + path.extend(rest); + Ok((i, PathOrIdentifier::Path(path))) } } } diff --git a/askama_parser/src/node.rs b/askama_parser/src/node.rs index 4c21ad1..a1aa7e2 100644 --- a/askama_parser/src/node.rs +++ b/askama_parser/src/node.rs @@ -3,15 +3,17 @@ use std::str; use nom::branch::alt; use nom::bytes::complete::{tag, take_until}; use nom::character::complete::char; -use nom::combinator::{complete, consumed, cut, eof, map, not, opt, peek, recognize, value}; +use nom::combinator::{ + complete, consumed, cut, eof, map, map_res, not, opt, peek, recognize, value, +}; use nom::error::{Error, ErrorKind}; use nom::multi::{fold_many0, many0, many1, separated_list0, separated_list1}; use nom::sequence::{delimited, pair, preceded, terminated, tuple}; use nom::{error_position, IResult}; use super::{ - bool_lit, char_lit, identifier, is_ws, keyword, num_lit, path, skip_till, str_lit, ws, Expr, - State, + bool_lit, char_lit, identifier, is_ws, keyword, num_lit, path_or_identifier, skip_till, + str_lit, ws, Expr, PathOrIdentifier, State, }; #[derive(Debug, PartialEq)] @@ -161,6 +163,13 @@ impl<'a> Target<'a> { return Ok((i, Self::Tuple(Vec::new(), targets))); } + let path = |i| { + map_res(path_or_identifier, |v| match v { + PathOrIdentifier::Path(v) => Ok(v), + PathOrIdentifier::Identifier(v) => Err(v), + })(i) + }; + // match structs let (i, path) = opt(path)(i)?; if let Some(path) = path { |