aboutsummaryrefslogtreecommitdiffstats
path: root/askama_parser/src/lib.rs
diff options
context:
space:
mode:
authorLibravatar Dirkjan Ochtman <dirkjan@ochtman.nl>2023-07-01 16:00:06 +0200
committerLibravatar Dirkjan Ochtman <dirkjan@ochtman.nl>2023-07-31 10:27:15 +0200
commite40e93796f45d34d038c6a20c4b034eb3b384b12 (patch)
treeff717ff9781c943f87715d7c8385a9af01fde3a9 /askama_parser/src/lib.rs
parentbdb6c0b89df5099fe6cf2e79e04e0efd21f64aa8 (diff)
downloadaskama-e40e93796f45d34d038c6a20c4b034eb3b384b12.tar.gz
askama-e40e93796f45d34d038c6a20c4b034eb3b384b12.tar.bz2
askama-e40e93796f45d34d038c6a20c4b034eb3b384b12.zip
Extract askama_parser crate
Diffstat (limited to 'askama_parser/src/lib.rs')
-rw-r--r--askama_parser/src/lib.rs384
1 files changed, 384 insertions, 0 deletions
diff --git a/askama_parser/src/lib.rs b/askama_parser/src/lib.rs
new file mode 100644
index 0000000..336004e
--- /dev/null
+++ b/askama_parser/src/lib.rs
@@ -0,0 +1,384 @@
+#![deny(unreachable_pub)]
+#![deny(elided_lifetimes_in_paths)]
+
+use std::cell::Cell;
+use std::{fmt, str};
+
+use nom::branch::alt;
+use nom::bytes::complete::{escaped, is_not, tag, take_till};
+use nom::character::complete::char;
+use nom::character::complete::{anychar, digit1};
+use nom::combinator::{eof, map, not, opt, recognize, value};
+use nom::error::ErrorKind;
+use nom::multi::separated_list1;
+use nom::sequence::{delimited, pair, tuple};
+use nom::{error_position, AsChar, IResult, InputTakeAtPosition};
+
+pub use self::expr::Expr;
+pub use self::node::{Cond, CondTest, Loop, Macro, Node, Target, When, Whitespace, Ws};
+
+mod expr;
+mod node;
+#[cfg(test)]
+mod tests;
+
+struct State<'a> {
+ syntax: &'a Syntax<'a>,
+ loop_depth: Cell<usize>,
+}
+
+impl<'a> State<'a> {
+ fn new(syntax: &'a Syntax<'a>) -> State<'a> {
+ State {
+ syntax,
+ loop_depth: Cell::new(0),
+ }
+ }
+
+ fn enter_loop(&self) {
+ self.loop_depth.set(self.loop_depth.get() + 1);
+ }
+
+ fn leave_loop(&self) {
+ self.loop_depth.set(self.loop_depth.get() - 1);
+ }
+
+ fn is_in_loop(&self) -> bool {
+ self.loop_depth.get() > 0
+ }
+}
+
+impl From<char> for Whitespace {
+ fn from(c: char) -> Self {
+ match c {
+ '+' => Self::Preserve,
+ '-' => Self::Suppress,
+ '~' => Self::Minimize,
+ _ => panic!("unsupported `Whitespace` conversion"),
+ }
+ }
+}
+
+mod _parsed {
+ use std::mem;
+
+ use super::{parse, Node, ParseError, Syntax};
+
+ pub struct Parsed {
+ #[allow(dead_code)]
+ source: String,
+ nodes: Vec<Node<'static>>,
+ }
+
+ impl Parsed {
+ pub fn new(source: String, syntax: &Syntax<'_>) -> Result<Self, ParseError> {
+ // Self-referential borrowing: `self` will keep the source alive as `String`,
+ // internally we will transmute it to `&'static str` to satisfy the compiler.
+ // However, we only expose the nodes with a lifetime limited to `self`.
+ let src = unsafe { mem::transmute::<&str, &'static str>(source.as_str()) };
+ let nodes = match parse(src, syntax) {
+ Ok(nodes) => nodes,
+ Err(e) => return Err(e),
+ };
+
+ Ok(Self { source, nodes })
+ }
+
+ // The return value's lifetime must be limited to `self` to uphold the unsafe invariant.
+ pub fn nodes(&self) -> &[Node<'_>] {
+ &self.nodes
+ }
+ }
+}
+
+pub use _parsed::Parsed;
+
+pub fn parse<'a>(src: &'a str, syntax: &Syntax<'_>) -> Result<Vec<Node<'a>>, ParseError> {
+ match Node::parse(src, &State::new(syntax)) {
+ Ok((left, res)) => {
+ if !left.is_empty() {
+ Err(ParseError(format!("unable to parse template:\n\n{left:?}")))
+ } else {
+ Ok(res)
+ }
+ }
+
+ Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => {
+ let nom::error::Error { input, .. } = err;
+ let offset = src.len() - input.len();
+ let (source_before, source_after) = src.split_at(offset);
+
+ let source_after = match source_after.char_indices().enumerate().take(41).last() {
+ Some((40, (i, _))) => format!("{:?}...", &source_after[..i]),
+ _ => format!("{source_after:?}"),
+ };
+
+ let (row, last_line) = source_before.lines().enumerate().last().unwrap();
+ let column = last_line.chars().count();
+
+ let msg = format!(
+ "problems parsing template source at row {}, column {} near:\n{}",
+ row + 1,
+ column,
+ source_after,
+ );
+
+ Err(ParseError(msg))
+ }
+
+ Err(nom::Err::Incomplete(_)) => Err(ParseError("parsing incomplete".into())),
+ }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ParseError(String);
+
+impl std::error::Error for ParseError {}
+
+impl fmt::Display for ParseError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+fn is_ws(c: char) -> bool {
+ matches!(c, ' ' | '\t' | '\r' | '\n')
+}
+
+fn not_ws(c: char) -> bool {
+ !is_ws(c)
+}
+
+fn ws<'a, O>(
+ inner: impl FnMut(&'a str) -> IResult<&'a str, O>,
+) -> impl FnMut(&'a str) -> IResult<&'a str, O> {
+ delimited(take_till(not_ws), inner, take_till(not_ws))
+}
+
+fn split_ws_parts(s: &str) -> Node<'_> {
+ let trimmed_start = s.trim_start_matches(is_ws);
+ let len_start = s.len() - trimmed_start.len();
+ let trimmed = trimmed_start.trim_end_matches(is_ws);
+ Node::Lit(&s[..len_start], trimmed, &trimmed_start[trimmed.len()..])
+}
+
+/// Skips input until `end` was found, but does not consume it.
+/// Returns tuple that would be returned when parsing `end`.
+fn skip_till<'a, O>(
+ end: impl FnMut(&'a str) -> IResult<&'a str, O>,
+) -> impl FnMut(&'a str) -> IResult<&'a str, (&'a str, O)> {
+ enum Next<O> {
+ IsEnd(O),
+ NotEnd(char),
+ }
+ let mut next = alt((map(end, Next::IsEnd), map(anychar, Next::NotEnd)));
+ move |start: &'a str| {
+ let mut i = start;
+ loop {
+ let (j, is_end) = next(i)?;
+ match is_end {
+ Next::IsEnd(lookahead) => return Ok((i, (j, lookahead))),
+ Next::NotEnd(_) => i = j,
+ }
+ }
+ }
+}
+
+fn keyword<'a>(k: &'a str) -> impl FnMut(&'a str) -> IResult<&'a str, &'a str> {
+ move |i: &'a str| -> IResult<&'a str, &'a str> {
+ let (j, v) = identifier(i)?;
+ if k == v {
+ Ok((j, v))
+ } else {
+ Err(nom::Err::Error(error_position!(i, ErrorKind::Tag)))
+ }
+ }
+}
+
+fn identifier(input: &str) -> IResult<&str, &str> {
+ recognize(pair(identifier_start, opt(identifier_tail)))(input)
+}
+
+fn identifier_start(s: &str) -> IResult<&str, &str> {
+ s.split_at_position1_complete(
+ |c| !(c.is_alpha() || c == '_' || c >= '\u{0080}'),
+ nom::error::ErrorKind::Alpha,
+ )
+}
+
+fn identifier_tail(s: &str) -> IResult<&str, &str> {
+ s.split_at_position1_complete(
+ |c| !(c.is_alphanum() || c == '_' || c >= '\u{0080}'),
+ nom::error::ErrorKind::Alpha,
+ )
+}
+
+fn bool_lit(i: &str) -> IResult<&str, &str> {
+ alt((keyword("false"), keyword("true")))(i)
+}
+
+fn num_lit(i: &str) -> IResult<&str, &str> {
+ recognize(pair(digit1, opt(pair(char('.'), digit1))))(i)
+}
+
+fn str_lit(i: &str) -> IResult<&str, &str> {
+ let (i, s) = delimited(
+ char('"'),
+ opt(escaped(is_not("\\\""), '\\', anychar)),
+ char('"'),
+ )(i)?;
+ Ok((i, s.unwrap_or_default()))
+}
+
+fn char_lit(i: &str) -> IResult<&str, &str> {
+ let (i, s) = delimited(
+ char('\''),
+ opt(escaped(is_not("\\\'"), '\\', anychar)),
+ char('\''),
+ )(i)?;
+ Ok((i, s.unwrap_or_default()))
+}
+
+fn nested_parenthesis(i: &str) -> IResult<&str, ()> {
+ let mut nested = 0;
+ let mut last = 0;
+ let mut in_str = false;
+ let mut escaped = false;
+
+ for (i, b) in i.chars().enumerate() {
+ if !(b == '(' || b == ')') || !in_str {
+ match b {
+ '(' => nested += 1,
+ ')' => {
+ if nested == 0 {
+ last = i;
+ break;
+ }
+ nested -= 1;
+ }
+ '"' => {
+ if in_str {
+ if !escaped {
+ in_str = false;
+ }
+ } else {
+ in_str = true;
+ }
+ }
+ '\\' => {
+ escaped = !escaped;
+ }
+ _ => (),
+ }
+ }
+
+ if escaped && b != '\\' {
+ escaped = false;
+ }
+ }
+
+ if nested == 0 {
+ Ok((&i[last..], ()))
+ } else {
+ Err(nom::Err::Error(error_position!(
+ i,
+ ErrorKind::SeparatedNonEmptyList
+ )))
+ }
+}
+
+fn path(i: &str) -> IResult<&str, Vec<&str>> {
+ let root = opt(value("", ws(tag("::"))));
+ let tail = separated_list1(ws(tag("::")), identifier);
+
+ match tuple((root, identifier, ws(tag("::")), tail))(i) {
+ Ok((i, (root, start, _, rest))) => {
+ let mut path = Vec::new();
+ path.extend(root);
+ path.push(start);
+ path.extend(rest);
+ Ok((i, path))
+ }
+ Err(err) => {
+ if let Ok((i, name)) = identifier(i) {
+ // The returned identifier can be assumed to be path if:
+ // - Contains both a lowercase and uppercase character, i.e. a type name like `None`
+ // - Doesn't contain any lowercase characters, i.e. it's a constant
+ // In short, if it contains any uppercase characters it's a path.
+ if name.contains(char::is_uppercase) {
+ return Ok((i, vec![name]));
+ }
+ }
+
+ // If `identifier()` fails then just return the original error
+ Err(err)
+ }
+ }
+}
+
+fn take_content<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, Node<'a>> {
+ let p_start = alt((
+ tag(s.syntax.block_start),
+ tag(s.syntax.comment_start),
+ tag(s.syntax.expr_start),
+ ));
+
+ let (i, _) = not(eof)(i)?;
+ let (i, content) = opt(recognize(skip_till(p_start)))(i)?;
+ let (i, content) = match content {
+ Some("") => {
+ // {block,comment,expr}_start follows immediately.
+ return Err(nom::Err::Error(error_position!(i, ErrorKind::TakeUntil)));
+ }
+ Some(content) => (i, content),
+ None => ("", i), // there is no {block,comment,expr}_start: take everything
+ };
+ Ok((i, split_ws_parts(content)))
+}
+
+fn tag_block_start<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> {
+ tag(s.syntax.block_start)(i)
+}
+
+fn tag_block_end<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> {
+ tag(s.syntax.block_end)(i)
+}
+
+fn tag_comment_start<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> {
+ tag(s.syntax.comment_start)(i)
+}
+
+fn tag_comment_end<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> {
+ tag(s.syntax.comment_end)(i)
+}
+
+fn tag_expr_start<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> {
+ tag(s.syntax.expr_start)(i)
+}
+
+fn tag_expr_end<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> {
+ tag(s.syntax.expr_end)(i)
+}
+
+#[derive(Debug)]
+pub struct Syntax<'a> {
+ pub block_start: &'a str,
+ pub block_end: &'a str,
+ pub expr_start: &'a str,
+ pub expr_end: &'a str,
+ pub comment_start: &'a str,
+ pub comment_end: &'a str,
+}
+
+impl Default for Syntax<'static> {
+ fn default() -> Self {
+ Self {
+ block_start: "{%",
+ block_end: "%}",
+ expr_start: "{{",
+ expr_end: "}}",
+ comment_start: "{#",
+ comment_end: "#}",
+ }
+ }
+}