diff options
-rw-r--r-- | src/parser.rs | 178 |
1 files changed, 105 insertions, 73 deletions
diff --git a/src/parser.rs b/src/parser.rs index 2382f68..bec5313 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,7 +2,7 @@ use std::char; use nom::{ branch::{alt, permutation}, - bytes::streaming::{is_a, is_not, tag, take, take_till, take_until}, + bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until}, character::{ complete::one_of, streaming::{alpha1, char, digit1, none_of, satisfy}, @@ -16,13 +16,6 @@ use nom::{ // parser: parses tokens from lexer into events -type Comment<'s> = &'s str; - -struct PI<'s> { - target: &'s str, - instruction: Option<&'s str>, -} - enum ContentItem<'s> { CharData(&'s str), Element(Element<'s>), @@ -197,79 +190,85 @@ pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> { ) } -// TODO: wtf why doesn't this work how do i do thisjj type CharData<'s> = &'s str; /// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) pub fn char_data(input: &str) -> IResult<&str, CharData> { - // tag(map( - // peek(alt(( - // map_parser( - // peek(take_until("]]>")), - // nom::bytes::complete::take_till(|c| c == '<' || c == '&'), - // ), - // map_parser( - // peek(take_till(|c| c == '<' || c == '&')), - // nom::bytes::complete::take_until("]]>"), - // ), - // ))), - // |(first, _)| first, - // ))(input) - - // map( - // tuple((is_not("<&]"), peek(alt((tag("<"), tag("&"), tag("]]>")))))), - // |(first, _)| first, - // )(input) - // map( - // tuple((recognize(many0(none_of("<&"))), opt(peek(tag("]]>"))))), - // |(first, _)| first, - // )(input) - // alt((recognize(many0(none_of("<&"))), take_until("]]>")))(input) - let tagg: &str; - if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) { - if let Ok((_, tagg2)) = - peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input) - { - if tagg1.len() < tagg2.len() { - tagg = tagg1 - } else { - tagg = tagg2 - } - } else { - tagg = tagg1; - } - } else { - (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)? - } - tag(tagg)(input) - // let mut len = 0; - // let ch = input.chars().collect::<Vec<_>>(); - // for (idx, char) in ch.as_ref().into_iter().enumerate() { - // match char { - // '<' | '&' => break, - // ']' => { - // if idx <= ch.len() - 3 {} - // }, - // _ => todo!(), - // } - // } - // while let Some(char) = chars.next() { - // if char == '<' || char == '&' { - // break; - // } else if char == ']' { - // if let Some(next) = chars.peek() { - // if next == ']' { - // if let Some(next) = chars.next_if_eq() {} - // } + recognize(many_till( + none_of("<&"), + peek(alt((recognize(one_of("<&")), tag("]]>")))), + ))(input) + + // let tagg: &str; + // if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) { + // if let Ok((_, tagg2)) = + // peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input) + // { + // if tagg1.len() < tagg2.len() { + // tagg = tagg1 + // } else { + // tagg = tagg2 // } + // } else { + // tagg = tagg1; // } - // len += 1; + // } else { + // (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)? // } - // todo!() + // tag(tagg)(input) + // recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input) // recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input) // take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input) } +type Comment<'s> = &'s str; +/// Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' +pub fn comment(input: &str) -> IResult<&str, Comment> { + delimited( + tag("<!--"), + recognize(many_till(xmlchar, peek(tag("--")))), + tag("-->"), + )(input) +} + +struct PI<'s> { + target: &'s str, + instruction: Option<&'s str>, +} +/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' +pub fn pi(input: &str) -> IResult<&str, PI> { + let (rest, (target, instruction)) = delimited( + tag("<?"), + pair( + pi_target, + opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))), + ), + tag("?>"), + )(input)?; + Ok(( + rest, + PI { + target, + instruction, + }, + )) +} + +type PITarget<'s> = &'s str; +/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) +pub fn pi_target(input: &str) -> IResult<&str, PITarget> { + let (rest, name) = name(input)?; + if name.to_lowercase() == "xml" { + return Err(Err::Error(Error { + input, + // TODO: check if better error to return + code: ErrorKind::Tag, + })); + } else { + return Ok((rest, name)); + } +} + type Prolog<'s> = ( Option<XMLDecl>, Vec<Misc<'s>>, @@ -320,8 +319,8 @@ pub fn version_info(input: &str) -> IResult<&str, VersionInfo> { } /// [25] Eq ::= S? '=' S? -pub fn eq(input: &str) -> IResult<&str, (Option<&str>, char, Option<&str>)> { - tuple((opt(s), char('='), opt(s)))(input) +pub fn eq(input: &str) -> IResult<&str, &str> { + recognize(tuple((opt(s), char('='), opt(s))))(input) } #[derive(Clone)] @@ -363,9 +362,42 @@ mod tests { assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi")); assert_eq!( Err(Err::Incomplete(nom::Needed::Size( - NonZero::new(1usize).unwrap() + NonZero::new(3usize).unwrap() ))), char_data("abcdefghi") ); } + + #[test] + fn test_comment() { + assert_eq!(Ok(("", "")), comment("<!---->")); + assert_eq!(Ok(("", "asdf")), comment("<!--asdf-->")); + assert_eq!(Ok(("", "as-df")), comment("<!--as-df-->")); + assert_eq!( + Err(Err::Incomplete(nom::Needed::Size( + NonZero::new(2usize).unwrap() + ))), + comment("<!--asdf") + ); + } + + #[test] + fn test_pi_target() { + assert_eq!(Ok((" ", "asdf")), pi_target("asdf ")); + assert_eq!(Ok((" ", "xmlasdf")), pi_target("xmlasdf ")); + assert_eq!( + Err(Err::Error(Error { + input: "xml ", + code: ErrorKind::Tag + })), + pi_target("xml ") + ); + assert_eq!( + Err(Err::Error(Error { + input: "xMl ", + code: ErrorKind::Tag + })), + pi_target("xMl ") + ); + } } |