diff options
author | Dirkjan Ochtman <dirkjan@ochtman.nl> | 2023-07-01 16:00:06 +0200 |
---|---|---|
committer | Dirkjan Ochtman <dirkjan@ochtman.nl> | 2023-07-31 10:27:15 +0200 |
commit | e40e93796f45d34d038c6a20c4b034eb3b384b12 (patch) | |
tree | ff717ff9781c943f87715d7c8385a9af01fde3a9 /askama_derive/src/parser/mod.rs | |
parent | bdb6c0b89df5099fe6cf2e79e04e0efd21f64aa8 (diff) | |
download | askama-e40e93796f45d34d038c6a20c4b034eb3b384b12.tar.gz askama-e40e93796f45d34d038c6a20c4b034eb3b384b12.tar.bz2 askama-e40e93796f45d34d038c6a20c4b034eb3b384b12.zip |
Extract askama_parser crate
Diffstat (limited to 'askama_derive/src/parser/mod.rs')
-rw-r--r-- | askama_derive/src/parser/mod.rs | 381 |
1 files changed, 0 insertions, 381 deletions
diff --git a/askama_derive/src/parser/mod.rs b/askama_derive/src/parser/mod.rs deleted file mode 100644 index 8da96f5..0000000 --- a/askama_derive/src/parser/mod.rs +++ /dev/null @@ -1,381 +0,0 @@ -use std::cell::Cell; -use std::{fmt, str}; - -use nom::branch::alt; -use nom::bytes::complete::{escaped, is_not, tag, take_till}; -use nom::character::complete::char; -use nom::character::complete::{anychar, digit1}; -use nom::combinator::{eof, map, not, opt, recognize, value}; -use nom::error::ErrorKind; -use nom::multi::separated_list1; -use nom::sequence::{delimited, pair, tuple}; -use nom::{error_position, AsChar, IResult, InputTakeAtPosition}; - -pub(crate) use self::expr::Expr; -pub(crate) use self::node::{Cond, CondTest, Loop, Macro, Node, Target, When, Whitespace, Ws}; - -mod expr; -mod node; -#[cfg(test)] -mod tests; - -struct State<'a> { - syntax: &'a Syntax<'a>, - loop_depth: Cell<usize>, -} - -impl<'a> State<'a> { - fn new(syntax: &'a Syntax<'a>) -> State<'a> { - State { - syntax, - loop_depth: Cell::new(0), - } - } - - fn enter_loop(&self) { - self.loop_depth.set(self.loop_depth.get() + 1); - } - - fn leave_loop(&self) { - self.loop_depth.set(self.loop_depth.get() - 1); - } - - fn is_in_loop(&self) -> bool { - self.loop_depth.get() > 0 - } -} - -impl From<char> for Whitespace { - fn from(c: char) -> Self { - match c { - '+' => Self::Preserve, - '-' => Self::Suppress, - '~' => Self::Minimize, - _ => panic!("unsupported `Whitespace` conversion"), - } - } -} - -mod _parsed { - use std::mem; - - use super::{parse, Node, ParseError, Syntax}; - - pub(crate) struct Parsed { - #[allow(dead_code)] - source: String, - nodes: Vec<Node<'static>>, - } - - impl Parsed { - pub(crate) fn new(source: String, syntax: &Syntax<'_>) -> Result<Self, ParseError> { - // Self-referential borrowing: `self` will keep the source alive as `String`, - // internally we will transmute it to `&'static str` to satisfy the compiler. - // However, we only expose the nodes with a lifetime limited to `self`. - let src = unsafe { mem::transmute::<&str, &'static str>(source.as_str()) }; - let nodes = match parse(src, syntax) { - Ok(nodes) => nodes, - Err(e) => return Err(e), - }; - - Ok(Self { source, nodes }) - } - - // The return value's lifetime must be limited to `self` to uphold the unsafe invariant. - pub(crate) fn nodes(&self) -> &[Node<'_>] { - &self.nodes - } - } -} - -pub(crate) use _parsed::Parsed; - -pub(crate) fn parse<'a>(src: &'a str, syntax: &Syntax<'_>) -> Result<Vec<Node<'a>>, ParseError> { - match Node::parse(src, &State::new(syntax)) { - Ok((left, res)) => { - if !left.is_empty() { - Err(ParseError(format!("unable to parse template:\n\n{left:?}"))) - } else { - Ok(res) - } - } - - Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - let nom::error::Error { input, .. } = err; - let offset = src.len() - input.len(); - let (source_before, source_after) = src.split_at(offset); - - let source_after = match source_after.char_indices().enumerate().take(41).last() { - Some((40, (i, _))) => format!("{:?}...", &source_after[..i]), - _ => format!("{source_after:?}"), - }; - - let (row, last_line) = source_before.lines().enumerate().last().unwrap(); - let column = last_line.chars().count(); - - let msg = format!( - "problems parsing template source at row {}, column {} near:\n{}", - row + 1, - column, - source_after, - ); - - Err(ParseError(msg)) - } - - Err(nom::Err::Incomplete(_)) => Err(ParseError("parsing incomplete".into())), - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) struct ParseError(String); - -impl std::error::Error for ParseError {} - -impl fmt::Display for ParseError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -fn is_ws(c: char) -> bool { - matches!(c, ' ' | '\t' | '\r' | '\n') -} - -fn not_ws(c: char) -> bool { - !is_ws(c) -} - -fn ws<'a, O>( - inner: impl FnMut(&'a str) -> IResult<&'a str, O>, -) -> impl FnMut(&'a str) -> IResult<&'a str, O> { - delimited(take_till(not_ws), inner, take_till(not_ws)) -} - -fn split_ws_parts(s: &str) -> Node<'_> { - let trimmed_start = s.trim_start_matches(is_ws); - let len_start = s.len() - trimmed_start.len(); - let trimmed = trimmed_start.trim_end_matches(is_ws); - Node::Lit(&s[..len_start], trimmed, &trimmed_start[trimmed.len()..]) -} - -/// Skips input until `end` was found, but does not consume it. -/// Returns tuple that would be returned when parsing `end`. -fn skip_till<'a, O>( - end: impl FnMut(&'a str) -> IResult<&'a str, O>, -) -> impl FnMut(&'a str) -> IResult<&'a str, (&'a str, O)> { - enum Next<O> { - IsEnd(O), - NotEnd(char), - } - let mut next = alt((map(end, Next::IsEnd), map(anychar, Next::NotEnd))); - move |start: &'a str| { - let mut i = start; - loop { - let (j, is_end) = next(i)?; - match is_end { - Next::IsEnd(lookahead) => return Ok((i, (j, lookahead))), - Next::NotEnd(_) => i = j, - } - } - } -} - -fn keyword<'a>(k: &'a str) -> impl FnMut(&'a str) -> IResult<&'a str, &'a str> { - move |i: &'a str| -> IResult<&'a str, &'a str> { - let (j, v) = identifier(i)?; - if k == v { - Ok((j, v)) - } else { - Err(nom::Err::Error(error_position!(i, ErrorKind::Tag))) - } - } -} - -fn identifier(input: &str) -> IResult<&str, &str> { - recognize(pair(identifier_start, opt(identifier_tail)))(input) -} - -fn identifier_start(s: &str) -> IResult<&str, &str> { - s.split_at_position1_complete( - |c| !(c.is_alpha() || c == '_' || c >= '\u{0080}'), - nom::error::ErrorKind::Alpha, - ) -} - -fn identifier_tail(s: &str) -> IResult<&str, &str> { - s.split_at_position1_complete( - |c| !(c.is_alphanum() || c == '_' || c >= '\u{0080}'), - nom::error::ErrorKind::Alpha, - ) -} - -fn bool_lit(i: &str) -> IResult<&str, &str> { - alt((keyword("false"), keyword("true")))(i) -} - -fn num_lit(i: &str) -> IResult<&str, &str> { - recognize(pair(digit1, opt(pair(char('.'), digit1))))(i) -} - -fn str_lit(i: &str) -> IResult<&str, &str> { - let (i, s) = delimited( - char('"'), - opt(escaped(is_not("\\\""), '\\', anychar)), - char('"'), - )(i)?; - Ok((i, s.unwrap_or_default())) -} - -fn char_lit(i: &str) -> IResult<&str, &str> { - let (i, s) = delimited( - char('\''), - opt(escaped(is_not("\\\'"), '\\', anychar)), - char('\''), - )(i)?; - Ok((i, s.unwrap_or_default())) -} - -fn nested_parenthesis(i: &str) -> IResult<&str, ()> { - let mut nested = 0; - let mut last = 0; - let mut in_str = false; - let mut escaped = false; - - for (i, b) in i.chars().enumerate() { - if !(b == '(' || b == ')') || !in_str { - match b { - '(' => nested += 1, - ')' => { - if nested == 0 { - last = i; - break; - } - nested -= 1; - } - '"' => { - if in_str { - if !escaped { - in_str = false; - } - } else { - in_str = true; - } - } - '\\' => { - escaped = !escaped; - } - _ => (), - } - } - - if escaped && b != '\\' { - escaped = false; - } - } - - if nested == 0 { - Ok((&i[last..], ())) - } else { - Err(nom::Err::Error(error_position!( - i, - ErrorKind::SeparatedNonEmptyList - ))) - } -} - -fn path(i: &str) -> IResult<&str, Vec<&str>> { - let root = opt(value("", ws(tag("::")))); - let tail = separated_list1(ws(tag("::")), identifier); - - match tuple((root, identifier, ws(tag("::")), tail))(i) { - Ok((i, (root, start, _, rest))) => { - let mut path = Vec::new(); - path.extend(root); - path.push(start); - path.extend(rest); - Ok((i, path)) - } - Err(err) => { - if let Ok((i, name)) = identifier(i) { - // The returned identifier can be assumed to be path if: - // - Contains both a lowercase and uppercase character, i.e. a type name like `None` - // - Doesn't contain any lowercase characters, i.e. it's a constant - // In short, if it contains any uppercase characters it's a path. - if name.contains(char::is_uppercase) { - return Ok((i, vec![name])); - } - } - - // If `identifier()` fails then just return the original error - Err(err) - } - } -} - -fn take_content<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, Node<'a>> { - let p_start = alt(( - tag(s.syntax.block_start), - tag(s.syntax.comment_start), - tag(s.syntax.expr_start), - )); - - let (i, _) = not(eof)(i)?; - let (i, content) = opt(recognize(skip_till(p_start)))(i)?; - let (i, content) = match content { - Some("") => { - // {block,comment,expr}_start follows immediately. - return Err(nom::Err::Error(error_position!(i, ErrorKind::TakeUntil))); - } - Some(content) => (i, content), - None => ("", i), // there is no {block,comment,expr}_start: take everything - }; - Ok((i, split_ws_parts(content))) -} - -fn tag_block_start<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> { - tag(s.syntax.block_start)(i) -} - -fn tag_block_end<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> { - tag(s.syntax.block_end)(i) -} - -fn tag_comment_start<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> { - tag(s.syntax.comment_start)(i) -} - -fn tag_comment_end<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> { - tag(s.syntax.comment_end)(i) -} - -fn tag_expr_start<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> { - tag(s.syntax.expr_start)(i) -} - -fn tag_expr_end<'a>(i: &'a str, s: &State<'_>) -> IResult<&'a str, &'a str> { - tag(s.syntax.expr_end)(i) -} - -#[derive(Debug)] -pub(crate) struct Syntax<'a> { - pub(crate) block_start: &'a str, - pub(crate) block_end: &'a str, - pub(crate) expr_start: &'a str, - pub(crate) expr_end: &'a str, - pub(crate) comment_start: &'a str, - pub(crate) comment_end: &'a str, -} - -impl Default for Syntax<'static> { - fn default() -> Self { - Self { - block_start: "{%", - block_end: "%}", - expr_start: "{{", - expr_end: "}}", - comment_start: "{#", - comment_end: "#}", - } - } -} |