aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/parser.rs178
1 files changed, 105 insertions, 73 deletions
diff --git a/src/parser.rs b/src/parser.rs
index 2382f68..bec5313 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -2,7 +2,7 @@ use std::char;
use nom::{
branch::{alt, permutation},
- bytes::streaming::{is_a, is_not, tag, take, take_till, take_until},
+ bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until},
character::{
complete::one_of,
streaming::{alpha1, char, digit1, none_of, satisfy},
@@ -16,13 +16,6 @@ use nom::{
// parser: parses tokens from lexer into events
-type Comment<'s> = &'s str;
-
-struct PI<'s> {
- target: &'s str,
- instruction: Option<&'s str>,
-}
-
enum ContentItem<'s> {
CharData(&'s str),
Element(Element<'s>),
@@ -197,79 +190,85 @@ pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> {
)
}
-// TODO: wtf why doesn't this work how do i do thisjj
type CharData<'s> = &'s str;
/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
pub fn char_data(input: &str) -> IResult<&str, CharData> {
- // tag(map(
- // peek(alt((
- // map_parser(
- // peek(take_until("]]>")),
- // nom::bytes::complete::take_till(|c| c == '<' || c == '&'),
- // ),
- // map_parser(
- // peek(take_till(|c| c == '<' || c == '&')),
- // nom::bytes::complete::take_until("]]>"),
- // ),
- // ))),
- // |(first, _)| first,
- // ))(input)
-
- // map(
- // tuple((is_not("<&]"), peek(alt((tag("<"), tag("&"), tag("]]>")))))),
- // |(first, _)| first,
- // )(input)
- // map(
- // tuple((recognize(many0(none_of("<&"))), opt(peek(tag("]]>"))))),
- // |(first, _)| first,
- // )(input)
- // alt((recognize(many0(none_of("<&"))), take_until("]]>")))(input)
- let tagg: &str;
- if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {
- if let Ok((_, tagg2)) =
- peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input)
- {
- if tagg1.len() < tagg2.len() {
- tagg = tagg1
- } else {
- tagg = tagg2
- }
- } else {
- tagg = tagg1;
- }
- } else {
- (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?
- }
- tag(tagg)(input)
- // let mut len = 0;
- // let ch = input.chars().collect::<Vec<_>>();
- // for (idx, char) in ch.as_ref().into_iter().enumerate() {
- // match char {
- // '<' | '&' => break,
- // ']' => {
- // if idx <= ch.len() - 3 {}
- // },
- // _ => todo!(),
- // }
- // }
- // while let Some(char) = chars.next() {
- // if char == '<' || char == '&' {
- // break;
- // } else if char == ']' {
- // if let Some(next) = chars.peek() {
- // if next == ']' {
- // if let Some(next) = chars.next_if_eq() {}
- // }
+ recognize(many_till(
+ none_of("<&"),
+ peek(alt((recognize(one_of("<&")), tag("]]>")))),
+ ))(input)
+
+ // let tagg: &str;
+ // if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {
+ // if let Ok((_, tagg2)) =
+ // peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input)
+ // {
+ // if tagg1.len() < tagg2.len() {
+ // tagg = tagg1
+ // } else {
+ // tagg = tagg2
// }
+ // } else {
+ // tagg = tagg1;
// }
- // len += 1;
+ // } else {
+ // (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?
// }
- // todo!()
+ // tag(tagg)(input)
+
// recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input)
// recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input)
// take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input)
}
+type Comment<'s> = &'s str;
+/// Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+pub fn comment(input: &str) -> IResult<&str, Comment> {
+ delimited(
+ tag("<!--"),
+ recognize(many_till(xmlchar, peek(tag("--")))),
+ tag("-->"),
+ )(input)
+}
+
+struct PI<'s> {
+ target: &'s str,
+ instruction: Option<&'s str>,
+}
+/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
+pub fn pi(input: &str) -> IResult<&str, PI> {
+ let (rest, (target, instruction)) = delimited(
+ tag("<?"),
+ pair(
+ pi_target,
+ opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))),
+ ),
+ tag("?>"),
+ )(input)?;
+ Ok((
+ rest,
+ PI {
+ target,
+ instruction,
+ },
+ ))
+}
+
+type PITarget<'s> = &'s str;
+/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
+pub fn pi_target(input: &str) -> IResult<&str, PITarget> {
+ let (rest, name) = name(input)?;
+ if name.to_lowercase() == "xml" {
+ return Err(Err::Error(Error {
+ input,
+ // TODO: check if better error to return
+ code: ErrorKind::Tag,
+ }));
+ } else {
+ return Ok((rest, name));
+ }
+}
+
type Prolog<'s> = (
Option<XMLDecl>,
Vec<Misc<'s>>,
@@ -320,8 +319,8 @@ pub fn version_info(input: &str) -> IResult<&str, VersionInfo> {
}
/// [25] Eq ::= S? '=' S?
-pub fn eq(input: &str) -> IResult<&str, (Option<&str>, char, Option<&str>)> {
- tuple((opt(s), char('='), opt(s)))(input)
+pub fn eq(input: &str) -> IResult<&str, &str> {
+ recognize(tuple((opt(s), char('='), opt(s))))(input)
}
#[derive(Clone)]
@@ -363,9 +362,42 @@ mod tests {
assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi"));
assert_eq!(
Err(Err::Incomplete(nom::Needed::Size(
- NonZero::new(1usize).unwrap()
+ NonZero::new(3usize).unwrap()
))),
char_data("abcdefghi")
);
}
+
+ #[test]
+ fn test_comment() {
+ assert_eq!(Ok(("", "")), comment("<!---->"));
+ assert_eq!(Ok(("", "asdf")), comment("<!--asdf-->"));
+ assert_eq!(Ok(("", "as-df")), comment("<!--as-df-->"));
+ assert_eq!(
+ Err(Err::Incomplete(nom::Needed::Size(
+ NonZero::new(2usize).unwrap()
+ ))),
+ comment("<!--asdf")
+ );
+ }
+
+ #[test]
+ fn test_pi_target() {
+ assert_eq!(Ok((" ", "asdf")), pi_target("asdf "));
+ assert_eq!(Ok((" ", "xmlasdf")), pi_target("xmlasdf "));
+ assert_eq!(
+ Err(Err::Error(Error {
+ input: "xml ",
+ code: ErrorKind::Tag
+ })),
+ pi_target("xml ")
+ );
+ assert_eq!(
+ Err(Err::Error(Error {
+ input: "xMl ",
+ code: ErrorKind::Tag
+ })),
+ pi_target("xMl ")
+ );
+ }
}