diff options
Diffstat (limited to 'src/parser.rs')
-rw-r--r-- | src/parser.rs | 77 |
1 files changed, 44 insertions, 33 deletions
diff --git a/src/parser.rs b/src/parser.rs index 518aad4..07d48c6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,10 +2,7 @@ use std::char; use nom::{ branch::alt, - bytes::{ - complete::take_until, - streaming::{is_a, tag, take}, - }, + bytes::streaming::{is_a, tag, take, take_till, take_until}, character::{ complete::one_of, streaming::{char, digit1, none_of, satisfy}, @@ -19,11 +16,6 @@ use nom::{ // parser: parses tokens from lexer into events -enum Misc<'s> { - Comment(Comment<'s>), - PI(PI<'s>), -} - type Comment<'s> = &'s str; struct PI<'s> { @@ -34,46 +26,44 @@ struct PI<'s> { enum ContentItem<'s> { CharData(&'s str), Element(Element<'s>), - Reference(Reference<'s>), - CDSect(CDSect<'s>), + // Reference(Reference<'s>), + // CDSect(CDSect<'s>), } type Content<'s> = Option<Vec<ContentItem<'s>>>; -struct Element<'s> { - name: &'s str, - attributes: Vec<Attribute<'s>>, - content: Content<'s>, -} - struct Attribute<'s> { key: &'s str, value: &'s str, } -// type VersionNum<'s> = &'s str; /// Contains only latin characters or dash after first char type EncName<'s> = &'s str; -// struct XMLDecl<'s> { -// version_info: VersionNum<'s>, -// encoding_decl: Option<EncName<'s>>, -// sd_decl: Option<bool>, -// } - struct DoctypeDecl<'s> { name: &'s str, - // TODO + // TODO: doctype declaration parsing } - +/// pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> { todo!() } +struct Element<'s> { + name: &'s str, + attributes: Vec<Attribute<'s>>, + content: Content<'s>, +} +/// Element pub fn element(input: &str) -> IResult<&str, Element> { todo!() } +enum Misc<'s> { + Comment(Comment<'s>), + PI(PI<'s>), +} +/// Misc pub fn misc(input: &str) -> IResult<&str, Misc> { todo!() } @@ -210,7 +200,7 @@ pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> { type CharData<'s> = &'s str; /// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) pub fn char_data(input: &str) -> IResult<&str, CharData> { - take_until()(input) + recognize(take_until("]]>").and_then(take_till(|c| c == '<' || c == '&')))(input) } type Prolog<'s> = ( @@ -229,23 +219,23 @@ pub fn prolog(input: &str) -> IResult<&str, Prolog> { struct XMLDecl { version_info: VersionInfo, - encoding_decl: Option<EncodingDecl>, - sd_decl: Option<SDDecl>, + // encoding_decl: Option<EncodingDecl>, + // sd_decl: Option<SDDecl>, } /// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> { // (VersionInfo, Option<EncodingDecl>, Option<SDDecl>) - let (leftover, (version_info, encoding_decl, sd_decl)) = delimited( + let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited( tag("<?xml"), - tuple((version_info, opt(encoding_decl), opt(sd_decl))), + tuple((version_info /* opt(encoding_decl), opt(sd_decl) */,)), tag("?>"), )(input)?; Ok(( leftover, XMLDecl { version_info, - encoding_decl, - sd_decl, + // encoding_decl, + // sd_decl, }, )) } @@ -282,3 +272,24 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> { )), )(input) } + +pub fn reference(input: &str) -> IResult<&str, char> { + todo!() +} + +pub fn pe_reference(input: &str) -> IResult<&str, char> { + todo!() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_char_data() { + assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi")); + assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi")); + assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi")); + assert_eq!(Ok(("", "abcdefghi")), char_data("abcdefghi")); + } +} |