diff options
| author | 2024-06-21 15:25:27 +0100 | |
|---|---|---|
| committer | 2024-06-21 15:25:27 +0100 | |
| commit | 9307f48d174c7657ac01444694e1cd96139bd800 (patch) | |
| tree | a1276b2e373d2fa3089fcf51f4bde15fc9a4af45 /src | |
| parent | ea98ddced71df70915df9a5ca50a8e67feedb0d3 (diff) | |
| download | peanuts-9307f48d174c7657ac01444694e1cd96139bd800.tar.gz peanuts-9307f48d174c7657ac01444694e1cd96139bd800.tar.bz2 peanuts-9307f48d174c7657ac01444694e1cd96139bd800.zip | |
WIP: more parsers
Diffstat (limited to '')
| -rw-r--r-- | src/parser.rs | 178 | 
1 files changed, 105 insertions, 73 deletions
| diff --git a/src/parser.rs b/src/parser.rs index 2382f68..bec5313 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,7 +2,7 @@ use std::char;  use nom::{      branch::{alt, permutation}, -    bytes::streaming::{is_a, is_not, tag, take, take_till, take_until}, +    bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until},      character::{          complete::one_of,          streaming::{alpha1, char, digit1, none_of, satisfy}, @@ -16,13 +16,6 @@ use nom::{  // parser: parses tokens from lexer into events -type Comment<'s> = &'s str; - -struct PI<'s> { -    target: &'s str, -    instruction: Option<&'s str>, -} -  enum ContentItem<'s> {      CharData(&'s str),      Element(Element<'s>), @@ -197,79 +190,85 @@ pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> {      )  } -// TODO: wtf why doesn't this work how do i do thisjj  type CharData<'s> = &'s str;  /// [14]   	CharData	   ::=   	[^<&]* - ([^<&]* ']]>' [^<&]*)  pub fn char_data(input: &str) -> IResult<&str, CharData> { -    // tag(map( -    //     peek(alt(( -    //         map_parser( -    //             peek(take_until("]]>")), -    //             nom::bytes::complete::take_till(|c| c == '<' || c == '&'), -    //         ), -    //         map_parser( -    //             peek(take_till(|c| c == '<' || c == '&')), -    //             nom::bytes::complete::take_until("]]>"), -    //         ), -    //     ))), -    //     |(first, _)| first, -    // ))(input) - -    // map( -    //     tuple((is_not("<&]"), peek(alt((tag("<"), tag("&"), tag("]]>")))))), -    //     |(first, _)| first, -    // )(input) -    // map( -    //     tuple((recognize(many0(none_of("<&"))), opt(peek(tag("]]>"))))), -    //     |(first, _)| first, -    // )(input) -    // alt((recognize(many0(none_of("<&"))), take_until("]]>")))(input) -    let tagg: &str; -    if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) { -        if let Ok((_, tagg2)) = -            peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input) -        { -            if tagg1.len() < tagg2.len() { -                tagg = tagg1 -            } else { -                tagg = tagg2 -            } -        } else { -            tagg = tagg1; -        } -    } else { -        (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)? -    } -    tag(tagg)(input) -    // let mut len = 0; -    // let ch  = input.chars().collect::<Vec<_>>(); -    // for (idx, char) in ch.as_ref().into_iter().enumerate() { -    //     match char { -    //         '<' | '&' =>                 break, -    //         ']' => { -    //             if idx <= ch.len() - 3 {} -    //         }, -    //         _ => todo!(), -    //     } -    // } -    // while let Some(char) = chars.next() { -    //     if char == '<' || char == '&' { -    //         break; -    //     } else if char == ']' { -    //         if let Some(next) = chars.peek() { -    //             if next == ']' { -    //                 if let Some(next) = chars.next_if_eq() {} -    //             } +    recognize(many_till( +        none_of("<&"), +        peek(alt((recognize(one_of("<&")), tag("]]>")))), +    ))(input) + +    // let tagg: &str; +    // if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) { +    //     if let Ok((_, tagg2)) = +    //         peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input) +    //     { +    //         if tagg1.len() < tagg2.len() { +    //             tagg = tagg1 +    //         } else { +    //             tagg = tagg2      //         } +    //     } else { +    //         tagg = tagg1;      //     } -    //     len += 1; +    // } else { +    //     (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?      // } -    // todo!() +    // tag(tagg)(input) +      // recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input)      // recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input)      // take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input)  } +type Comment<'s> = &'s str; +/// Comment	   ::=   	'<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' +pub fn comment(input: &str) -> IResult<&str, Comment> { +    delimited( +        tag("<!--"), +        recognize(many_till(xmlchar, peek(tag("--")))), +        tag("-->"), +    )(input) +} + +struct PI<'s> { +    target: &'s str, +    instruction: Option<&'s str>, +} +/// [16]   	PI	   ::=   	'<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' +pub fn pi(input: &str) -> IResult<&str, PI> { +    let (rest, (target, instruction)) = delimited( +        tag("<?"), +        pair( +            pi_target, +            opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))), +        ), +        tag("?>"), +    )(input)?; +    Ok(( +        rest, +        PI { +            target, +            instruction, +        }, +    )) +} + +type PITarget<'s> = &'s str; +/// [17]   	PITarget	   ::=   	Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) +pub fn pi_target(input: &str) -> IResult<&str, PITarget> { +    let (rest, name) = name(input)?; +    if name.to_lowercase() == "xml" { +        return Err(Err::Error(Error { +            input, +            // TODO: check if better error to return +            code: ErrorKind::Tag, +        })); +    } else { +        return Ok((rest, name)); +    } +} +  type Prolog<'s> = (      Option<XMLDecl>,      Vec<Misc<'s>>, @@ -320,8 +319,8 @@ pub fn version_info(input: &str) -> IResult<&str, VersionInfo> {  }  /// [25]   	Eq	   ::=   	S? '=' S? -pub fn eq(input: &str) -> IResult<&str, (Option<&str>, char, Option<&str>)> { -    tuple((opt(s), char('='), opt(s)))(input) +pub fn eq(input: &str) -> IResult<&str, &str> { +    recognize(tuple((opt(s), char('='), opt(s))))(input)  }  #[derive(Clone)] @@ -363,9 +362,42 @@ mod tests {          assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi"));          assert_eq!(              Err(Err::Incomplete(nom::Needed::Size( -                NonZero::new(1usize).unwrap() +                NonZero::new(3usize).unwrap()              ))),              char_data("abcdefghi")          );      } + +    #[test] +    fn test_comment() { +        assert_eq!(Ok(("", "")), comment("<!---->")); +        assert_eq!(Ok(("", "asdf")), comment("<!--asdf-->")); +        assert_eq!(Ok(("", "as-df")), comment("<!--as-df-->")); +        assert_eq!( +            Err(Err::Incomplete(nom::Needed::Size( +                NonZero::new(2usize).unwrap() +            ))), +            comment("<!--asdf") +        ); +    } + +    #[test] +    fn test_pi_target() { +        assert_eq!(Ok((" ", "asdf")), pi_target("asdf ")); +        assert_eq!(Ok((" ", "xmlasdf")), pi_target("xmlasdf ")); +        assert_eq!( +            Err(Err::Error(Error { +                input: "xml ", +                code: ErrorKind::Tag +            })), +            pi_target("xml ") +        ); +        assert_eq!( +            Err(Err::Error(Error { +                input: "xMl ", +                code: ErrorKind::Tag +            })), +            pi_target("xMl ") +        ); +    }  } | 
