diff options
Diffstat (limited to 'src/xml/parsers.rs')
-rw-r--r-- | src/xml/parsers.rs | 131 |
1 files changed, 77 insertions, 54 deletions
diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs index 6326593..93ff5b1 100644 --- a/src/xml/parsers.rs +++ b/src/xml/parsers.rs @@ -15,18 +15,18 @@ use nom::{ use crate::xml::NSAttName; use super::{ - AttDef, AttDefName, AttType, AttValue, AttlistDecl, Attribute, CDEnd, CDSect, CDStart, CData, - Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment, ConditionalSect, Content, - ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName, DefaultDecl, DoctypeDecl, - Document, ETag, Element, Elementdecl, EmptyElemTag, EncName, EncodingDecl, EntityDecl, - EntityDef, EntityRef, EntityValue, EnumeratedType, Enumeration, Eq, ExtParsedEnt, ExtSubset, - ExtSubsetDecl, ExtSubsetDeclaration, ExternalID, GEDecl, Ignore, IgnoreSect, - IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration, LiteralData, LocalPart, - MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names, Nmtoken, - Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef, PEReference, - PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral, PublicID, - QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl, TokenizedType, - UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S, + AttDef, AttDefName, AttType, AttValue, AttValueData, AttlistDecl, Attribute, CDEnd, CDSect, + CDStart, CData, Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment, + ConditionalSect, Content, ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName, + DefaultDecl, DoctypeDecl, Document, ETag, Element, Elementdecl, EmptyElemTag, EncName, + EncodingDecl, EntityDecl, EntityDef, EntityRef, EntityValue, EntityValueData, EnumeratedType, + Enumeration, Eq, ExtParsedEnt, ExtSubset, ExtSubsetDecl, ExtSubsetDeclaration, ExternalID, + GEDecl, Ignore, IgnoreSect, IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration, + LocalPart, MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names, + Nmtoken, Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef, + PEReference, PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral, + PublicID, QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl, + TokenizedType, UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S, }; pub trait Parser<'s, T> { @@ -141,9 +141,11 @@ impl Parser<'_, Char> for Char { } /// [3] S ::= (#x20 | #x9 | #xD | #xA)+ -impl<'s> Parser<'s, S<'s>> for S<'s> { - fn parse(input: &'s str) -> IResult<&str, S<'s>> { - map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input) +impl Parser<'_, S> for S { + fn parse(input: &str) -> IResult<&str, S> { + // TODO?: whitespacing + // map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input) + value(S, is_a("\u{20}\u{9}\u{D}\u{A}"))(input) } } @@ -221,43 +223,46 @@ impl<'s> Parser<'s, Nmtokens<'s>> for Nmtokens<'s> { /// | "'" ([^%&'] | PEReference | Reference)* "'" impl<'s> Parser<'s, EntityValue<'s>> for EntityValue<'s> { fn parse(input: &'s str) -> IResult<&str, EntityValue<'s>> { - map( - alt(( + alt(( + map( delimited( char('"'), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&\"")))), - |string| LiteralData::String(string), + |string| EntityValueData::String(string), ), map(PEReference::parse, |pe_reference| { - LiteralData::PEReference(pe_reference) + EntityValueData::PEReference(pe_reference) }), map(Reference::parse, |reference| { - LiteralData::Reference(reference) + EntityValueData::Reference(reference) }), ))), char('"'), ), + |entity_value| EntityValue::DoubleQuoted(entity_value), + ), + map( delimited( char('\''), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&'")))), - |string| LiteralData::String(string), + |string| EntityValueData::String(string), ), map(PEReference::parse, |pe_reference| { - LiteralData::PEReference(pe_reference) + EntityValueData::PEReference(pe_reference) }), map(Reference::parse, |reference| { - LiteralData::Reference(reference) + EntityValueData::Reference(reference) }), ))), char('\''), ), - )), - |entity_value| EntityValue(entity_value), - )(input) + |entity_value| EntityValue::SingleQuoted(entity_value), + ), + ))(input) } } @@ -265,67 +270,76 @@ impl<'s> Parser<'s, EntityValue<'s>> for EntityValue<'s> { /// | "'" ([^<&'] | Reference)* "'" impl<'s> Parser<'s, AttValue<'s>> for AttValue<'s> { fn parse(input: &'s str) -> IResult<&str, AttValue<'s>> { - map( - alt(( + alt(( + map( delimited( char('"'), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&\"")))), - |string| LiteralData::String(string), + |string| AttValueData::String(string), ), map(Reference::parse, |reference| { - LiteralData::Reference(reference) + AttValueData::Reference(reference) }), ))), char('"'), ), + |att_value| AttValue::DoubleQuoted(att_value), + ), + map( delimited( char('\''), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&'")))), - |string| LiteralData::String(string), + |string| AttValueData::String(string), ), map(Reference::parse, |reference| { - LiteralData::Reference(reference) + AttValueData::Reference(reference) }), ))), char('\''), ), - )), - |att_value| AttValue(att_value), - )(input) + |att_value| AttValue::SingleQuoted(att_value), + ), + ))(input) } } /// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") impl<'s> Parser<'s, SystemLiteral<'s>> for SystemLiteral<'s> { fn parse(input: &'s str) -> IResult<&str, SystemLiteral<'s>> { - map( - alt(( + alt(( + map( delimited(char('"'), recognize(many0(none_of("\""))), char('"')), + |system_literal| SystemLiteral::DoubleQuoted(system_literal), + ), + map( delimited(char('\''), recognize(many0(none_of("'"))), char('\'')), - )), - |system_literal| SystemLiteral(system_literal), - )(input) + |system_literal| SystemLiteral::SingleQuoted(system_literal), + ), + ))(input) } } /// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" impl<'s> Parser<'s, PubidLiteral<'s>> for PubidLiteral<'s> { fn parse(input: &'s str) -> IResult<&str, PubidLiteral<'s>> { - map( - alt(( + alt(( + map( delimited(char('"'), recognize(many0(PubidChar::parse)), char('"')), + |pubid_literal| PubidLiteral::DoubleQuoted(pubid_literal), + ), + map( delimited( char('\''), recognize(many0(recognize(not(char('\''))).and_then(PubidChar::parse))), char('\''), ), - )), - |pubid_literal| PubidLiteral(pubid_literal), - )(input) + |pubid_literal| PubidLiteral::SingleQuoted(pubid_literal), + ), + ))(input) } } @@ -477,15 +491,18 @@ impl<'s> Parser<'s, XMLDecl<'s>> for XMLDecl<'s> { /// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') impl Parser<'_, VersionInfo> for VersionInfo { fn parse(input: &'_ str) -> IResult<&str, VersionInfo> { - map( - preceded( - tuple((S::parse, tag("version"), Eq::parse)), - alt(( + preceded( + tuple((S::parse, tag("version"), Eq::parse)), + alt(( + map( delimited(char('\''), VersionNum::parse, char('\'')), + |version_info| VersionInfo::SingleQuoted(version_info), + ), + map( delimited(char('"'), VersionNum::parse, char('"')), - )), - ), - |version_num| VersionInfo(version_num), + |version_info| VersionInfo::DoubleQuoted(version_info), + ), + )), )(input) } } @@ -639,12 +656,18 @@ impl Parser<'_, SDDecl> for SDDecl { alt(( delimited( char('\''), - alt((value(true, tag("yes")), value(false, tag("no")))), + alt(( + value(SDDecl::SingleQuoted(true), tag("yes")), + value(SDDecl::SingleQuoted(false), tag("no")), + )), char('\''), ), delimited( char('"'), - alt((value(true, tag("yes")), value(false, tag("no")))), + alt(( + value(SDDecl::DoubleQuoted(true), tag("yes")), + value(SDDecl::DoubleQuoted(false), tag("no")), + )), char('"'), ), )), |