diff options
author | 2024-10-31 20:45:46 +0000 | |
---|---|---|
committer | 2024-10-31 20:45:46 +0000 | |
commit | c41da2c89e2c188d9238b2c3e1555ed81335c81d (patch) | |
tree | 353d39b9f9191847c63f389485d1af36f30cdf3c | |
parent | 6b471061157ee1873d7ac4f3e30cd501d27dcb5a (diff) | |
download | peanuts-c41da2c89e2c188d9238b2c3e1555ed81335c81d.tar.gz peanuts-c41da2c89e2c188d9238b2c3e1555ed81335c81d.tar.bz2 peanuts-c41da2c89e2c188d9238b2c3e1555ed81335c81d.zip |
refactor parsers into Parser trait implementations
-rw-r--r-- | src/xml/mod.rs | 1240 | ||||
-rw-r--r-- | src/xml/parsers.rs | 1502 |
2 files changed, 1653 insertions, 1089 deletions
diff --git a/src/xml/mod.rs b/src/xml/mod.rs index 47c1779..b1d6ea5 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -1,185 +1,92 @@ use std::char; -use nom::{ - branch::alt, - bytes::streaming::{is_a, tag, take, take_while}, - character::{ - complete::one_of, - streaming::{char, none_of, satisfy}, - }, - combinator::{map, not, opt, peek, recognize, value}, - error::{Error, ErrorKind}, - multi::{many0, many1, many_till}, - sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, - Err, IResult, Parser, -}; - -// parser: parses tokens from lexer into events -// no well formedness, validity, or data model, simple translation of input into rust types -// output is a rust representation of the input xml -// types could be used for xml production too? - mod parsers; +/// [1] NSAttName ::= PrefixedAttName | DefaultAttName #[derive(Clone, Debug)] pub enum NSAttName<'s> { PrefixedAttName(PrefixedAttName<'s>), DefaultAttName, } -/// [1] NSAttName ::= PrefixedAttName | DefaultAttName -pub fn ns_att_name(input: &str) -> IResult<&str, NSAttName> { - alt(( - map(prefixed_att_name, |prefixed_att_name| { - NSAttName::PrefixedAttName(prefixed_att_name) - }), - value(NSAttName::DefaultAttName, default_att_name), - ))(input) -} +/// [2] PrefixedAttName ::= 'xmlns:' NCName #[derive(Clone, Debug)] pub struct PrefixedAttName<'s>(NCName<'s>); -/// [2] PrefixedAttName ::= 'xmlns:' NCName -pub fn prefixed_att_name(input: &str) -> IResult<&str, PrefixedAttName> { - map(preceded(tag("xmlns:"), nc_name), |nc_name| { - PrefixedAttName(nc_name) - })(input) -} +/// [3] DefaultAttName ::= 'xmlns'; #[derive(Clone, Debug)] pub struct DefaultAttName; -/// [3] DefaultAttName ::= 'xmlns'; -pub fn default_att_name(input: &str) -> IResult<&str, DefaultAttName> { - value(DefaultAttName, tag("xmlns"))(input) -} +/// [4] NCName ::= Name - (Char* ':' Char*) #[derive(Clone, Debug)] pub struct NCName<'s>(&'s str); -/// [4] NCName ::= Name - (Char* ':' Char*) -pub fn nc_name(input: &str) -> IResult<&str, NCName> { - map( - recognize(pair( - recognize(name_start_char).and_then(satisfy(|c| c != ':')), - many_till(name_char, peek(char(':'))), - )), - |nc_name| NCName(nc_name), - )(input) -} +/// [7] QName ::= PrefixedName | UnprefixedName #[derive(Clone, Debug)] pub enum QName<'s> { PrefixedName(PrefixedName<'s>), UnprefixedName(UnprefixedName<'s>), } -/// [7] QName ::= PrefixedName | UnprefixedName -pub fn q_name(input: &str) -> IResult<&str, QName> { - alt(( - map(prefixed_name, |prefixed_name| { - QName::PrefixedName(prefixed_name) - }), - map(unprefixed_name, |unprefixed_name| { - QName::UnprefixedName(unprefixed_name) - }), - ))(input) -} +/// [8] PrefixedName ::= Prefix ':' LocalPart #[derive(Clone, Debug)] pub struct PrefixedName<'s> { prefix: Prefix<'s>, local_part: LocalPart<'s>, } -/// [8] PrefixedName ::= Prefix ':' LocalPart -pub fn prefixed_name(input: &str) -> IResult<&str, PrefixedName> { - map( - separated_pair(prefix, char(':'), local_part), - |(prefix, local_part)| PrefixedName { prefix, local_part }, - )(input) -} +/// [9] UnprefixedName ::= LocalPart #[derive(Clone, Debug)] pub struct UnprefixedName<'s>(LocalPart<'s>); -/// [9] UnprefixedName ::= LocalPart -pub fn unprefixed_name(input: &str) -> IResult<&str, UnprefixedName> { - map(local_part, |local_part| UnprefixedName(local_part))(input) -} +/// [10] Prefix ::= NCName #[derive(Clone, Debug)] pub struct Prefix<'s>(NCName<'s>); -/// [10] Prefix ::= NCName -pub fn prefix(input: &str) -> IResult<&str, Prefix> { - map(nc_name, |nc_name| Prefix(nc_name))(input) -} +/// [11] LocalPart ::= NCName #[derive(Clone, Debug)] pub struct LocalPart<'s>(NCName<'s>); -/// [11] LocalPart ::= NCName -pub fn local_part(input: &str) -> IResult<&str, LocalPart> { - map(nc_name, |nc_name| LocalPart(nc_name))(input) -} // xml spec -pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>); /// [1] document ::= prolog element Misc* -pub fn document(input: &str) -> IResult<&str, Document> { - tuple((prolog, element, many0(misc)))(input) -} +pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>); -pub type Char = char; /// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ -pub fn xmlchar(input: &str) -> IResult<&str, Char> { - satisfy( - |c| matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}'), - )(input) -} +#[repr(transparent)] +pub struct Char(char); -pub type S<'s> = &'s str; /// [3] S ::= (#x20 | #x9 | #xD | #xA)+ -pub fn s(input: &str) -> IResult<&str, S> { - is_a("\u{20}\u{9}\u{D}\u{A}")(input) -} +#[repr(transparent)] +pub struct S<'s>(&'s str); -pub type NameStartChar = char; /// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] -pub fn name_start_char(input: &str) -> IResult<&str, NameStartChar> { - satisfy( - |c| matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}'), - )(input) -} +#[repr(transparent)] +pub struct NameStartChar(char); -pub type NameChar = char; /// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] -pub fn name_char(input: &str) -> IResult<&str, NameChar> { - alt(( - name_start_char, - satisfy( - |c| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'), - ), - ))(input) -} +#[repr(transparent)] +pub struct NameChar(char); -pub type Name<'s> = &'s str; /// [5] Name ::= NameStartChar (NameChar)* -pub fn name(input: &str) -> IResult<&str, Name> { - recognize(pair(name_start_char, many0(name_char)))(input) -} +#[derive(Debug, Clone, PartialEq, Eq)] +#[repr(transparent)] +pub struct Name<'s>(&'s str); -pub type Names<'s> = &'s str; /// [6] Names ::= Name (#x20 Name)* -pub fn names(input: &str) -> IResult<&str, Names> { - recognize(pair(name, many0(pair(char('\u{20}'), name))))(input) -} +#[repr(transparent)] +// TODO: turn into vec +pub struct Names<'s>(&'s str); -pub type Nmtoken<'s> = &'s str; /// [7] Nmtoken ::= (NameChar)+ -pub fn nmtoken(input: &str) -> IResult<&str, Nmtoken> { - recognize(many1(name_char))(input) -} +#[derive(Debug, Clone)] +#[repr(transparent)] +pub struct Nmtoken<'s>(&'s str); -pub type Nmtokens<'s> = &'s str; /// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* -pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> { - recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input) -} +#[repr(transparent)] +// TODO: turn into vec +pub struct Nmtokens<'s>(&'s str); #[derive(Clone, Debug)] pub enum LiteralData<'s> { @@ -187,275 +94,103 @@ pub enum LiteralData<'s> { PEReference(PEReference<'s>), Reference(Reference<'s>), } - -pub type EntityValue<'s> = Vec<LiteralData<'s>>; /// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' /// | "'" ([^%&'] | PEReference | Reference)* "'" -pub fn entity_value(input: &str) -> IResult<&str, EntityValue> { - alt(( - delimited( - char('"'), - many0(alt(( - map( - recognize(many_till(take(1usize), peek(one_of("%&\"")))), - |string| LiteralData::String(string), - ), - map(pe_reference, |pe_reference| { - LiteralData::PEReference(pe_reference) - }), - map(reference, |reference| LiteralData::Reference(reference)), - ))), - char('"'), - ), - delimited( - char('\''), - many0(alt(( - map( - recognize(many_till(take(1usize), peek(one_of("%&'")))), - |string| LiteralData::String(string), - ), - map(pe_reference, |pe_reference| { - LiteralData::PEReference(pe_reference) - }), - map(reference, |reference| LiteralData::Reference(reference)), - ))), - char('\''), - ), - ))(input) -} +#[derive(Debug)] +#[repr(transparent)] +pub struct EntityValue<'s>(Vec<LiteralData<'s>>); -pub type AttValue<'s> = Vec<LiteralData<'s>>; /// [10] AttValue ::= '"' ([^<&"] | Reference)* '"' /// | "'" ([^<&'] | Reference)* "'" -pub fn att_value(input: &str) -> IResult<&str, AttValue> { - alt(( - delimited( - char('"'), - many0(alt(( - map( - recognize(many_till(take(1usize), peek(one_of("%&\"")))), - |string| LiteralData::String(string), - ), - map(reference, |reference| LiteralData::Reference(reference)), - ))), - char('"'), - ), - delimited( - char('\''), - many0(alt(( - map( - recognize(many_till(take(1usize), peek(one_of("%&'")))), - |string| LiteralData::String(string), - ), - map(reference, |reference| LiteralData::Reference(reference)), - ))), - char('\''), - ), - ))(input) -} +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct AttValue<'s>(Vec<LiteralData<'s>>); -pub type SystemLiteral<'s> = &'s str; /// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") -pub fn system_literal(input: &str) -> IResult<&str, SystemLiteral> { - alt(( - delimited(char('"'), recognize(many0(none_of("\""))), char('"')), - delimited(char('\''), recognize(many0(none_of("'"))), char('\'')), - ))(input) -} +#[derive(Debug)] +#[repr(transparent)] +pub struct SystemLiteral<'s>(&'s str); -pub type PubidLiteral<'s> = &'s str; /// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" -pub fn pubid_literal(input: &str) -> IResult<&str, PubidLiteral> { - alt(( - delimited(char('"'), recognize(many0(pubid_char)), char('"')), - delimited( - char('\''), - recognize(many0(recognize(not(char('\''))).and_then(pubid_char))), - char('\''), - ), - ))(input) -} +#[derive(Debug)] +#[repr(transparent)] +pub struct PubidLiteral<'s>(&'s str); -pub type PubidChar<'s> = char; /// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] -pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> { - satisfy(|c| matches!(c, '\u{20}' | '\u{D}' | '\u{A}' | 'a'..='z' | 'A'..='Z' | '0'..='9'))( - input, - ) -} +#[repr(transparent)] +pub struct PubidChar(char); -pub type CharData<'s> = &'s str; /// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) -pub fn char_data(input: &str) -> IResult<&str, CharData> { - recognize(many_till( - none_of("<&"), - peek(alt((recognize(one_of("<&")), tag("]]>")))), - ))(input) - - // let tagg: &str; - // if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) { - // if let Ok((_, tagg2)) = - // peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input) - // { - // if tagg1.len() < tagg2.len() { - // tagg = tagg1 - // } else { - // tagg = tagg2 - // } - // } else { - // tagg = tagg1; - // } - // } else { - // (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)? - // } - // tag(tagg)(input) - - // recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input) - // recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input) - // take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct CharData<'s>(&'s str); -pub type Comment<'s> = &'s str; -/// Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' -pub fn comment(input: &str) -> IResult<&str, Comment> { - delimited( - tag("<!--"), - recognize(many_till(xmlchar, peek(tag("--")))), - tag("-->"), - )(input) -} +/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct Comment<'s>(&'s str); +/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' #[derive(Clone, Debug)] pub struct PI<'s> { - target: &'s str, + target: PITarget<'s>, instruction: Option<&'s str>, } -/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' -pub fn pi(input: &str) -> IResult<&str, PI> { - map( - delimited( - tag("<?"), - pair( - pi_target, - opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))), - ), - tag("?>"), - ), - |(target, instruction)| PI { - target, - instruction, - }, - )(input) -} -pub type PITarget<'s> = &'s str; /// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) -pub fn pi_target(input: &str) -> IResult<&str, PITarget> { - let (rest, name) = name(input)?; - if name.to_lowercase() == "xml" { - return Err(Err::Error(Error { - input, - // TODO: check if better error to return - code: ErrorKind::Tag, - })); - } else { - return Ok((rest, name)); - } -} +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct PITarget<'s>(Name<'s>); -pub type CDSect<'s> = (CDStart<'s>, CData<'s>, CDEnd<'s>); /// [18] CDSect ::= CDStart CData CDEnd -pub fn cd_sect(input: &str) -> IResult<&str, CDSect> { - tuple((cd_start, cdata, cd_end))(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct CDSect<'s>(CData<'s>); -pub type CDStart<'s> = &'s str; /// [19] CDStart ::= '<![CDATA[' -pub fn cd_start(input: &str) -> IResult<&str, CDStart> { - tag("<![CDATA[")(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CDStart; -pub type CData<'s> = &'s str; /// [20] CData ::= (Char* - (Char* ']]>' Char*)) -pub fn cdata(input: &str) -> IResult<&str, CData> { - recognize(many_till(xmlchar, peek(tag("]]>"))))(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct CData<'s>(&'s str); -pub type CDEnd<'s> = &'s str; /// [21] CDEnd ::= ']]>' -pub fn cd_end(input: &str) -> IResult<&str, CDEnd> { - tag("]]>")(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CDEnd; +/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? pub type Prolog<'s> = ( Option<XMLDecl<'s>>, Vec<Misc<'s>>, Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>, ); -/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? -pub fn prolog(input: &str) -> IResult<&str, Prolog> { - tuple(( - opt(xml_decl), - many0(misc), - opt(tuple((doctypedecl, many0(misc)))), - ))(input) -} +/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' #[derive(Debug)] pub struct XMLDecl<'s> { version_info: VersionInfo, encoding_decl: Option<EncodingDecl<'s>>, sd_decl: Option<SDDecl>, } -/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' -pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> { - map( - delimited( - tag("<?xml"), - tuple((version_info, opt(encoding_decl), opt(sd_decl))), - pair(opt(s), tag("?>")), - ), - |(version_info, encoding_decl, sd_decl)| XMLDecl { - version_info, - encoding_decl, - sd_decl, - }, - )(input) -} -pub type VersionInfo = VersionNum; /// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') -pub fn version_info(input: &str) -> IResult<&str, VersionInfo> { - preceded( - tuple((s, tag("version"), eq)), - alt(( - delimited(char('\''), version_num, char('\'')), - delimited(char('"'), version_num, char('"')), - )), - )(input) -} +#[derive(Debug)] +pub struct VersionInfo(VersionNum); /// [25] Eq ::= S? '=' S? -pub fn eq(input: &str) -> IResult<&str, &str> { - recognize(tuple((opt(s), char('='), opt(s))))(input) -} +#[derive(Clone)] +pub struct Eq; +/// [26] VersionNum ::= '1.' [0-9]+ #[derive(Clone, Debug)] pub enum VersionNum { One, OneDotOne, } -/// [26] VersionNum ::= '1.' [0-9]+ -pub fn version_num(input: &str) -> IResult<&str, VersionNum> { - preceded( - tag("1."), - alt(( - value(VersionNum::One, char('0')), - value(VersionNum::OneDotOne, char('1')), - )), - )(input) -} +/// [27] Misc ::= Comment | PI | S #[derive(Clone, Debug)] pub enum Misc<'s> { Comment(Comment<'s>), @@ -463,80 +198,33 @@ pub enum Misc<'s> { // TODO: how to deal with whitespace S, } -/// [27] Misc ::= Comment | PI | S -pub fn misc(input: &str) -> IResult<&str, Misc> { - alt(( - map(comment, |comment| Misc::Comment(comment)), - map(pi, |pi| Misc::PI(pi)), - value(Misc::S, s), - ))(input) -} +/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' +/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' #[derive(Debug)] pub struct DoctypeDecl<'s> { name: QName<'s>, external_id: Option<ExternalID<'s>>, int_subset: Option<IntSubset<'s>>, } -/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' -/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' -pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> { - map( - delimited( - pair(tag("<!DOCTYPE"), s), - tuple(( - q_name, - opt(preceded(s, external_id)), - preceded( - opt(s), - opt(terminated( - delimited(tag("["), int_subset, tag("]")), - opt(s), - )), - ), - )), - tag(">"), - ), - |(name, external_id, int_subset)| DoctypeDecl { - name, - external_id, - int_subset, - }, - )(input) -} +/// [28a] DeclSep ::= PEReference | S #[derive(Clone, Debug)] pub enum DeclSep<'s> { PEReference(PEReference<'s>), // TODO: tackle whitespace S, } -/// [28a] DeclSep ::= PEReference | S -pub fn decl_sep(input: &str) -> IResult<&str, DeclSep> { - alt(( - map(pe_reference, |pe_reference| { - DeclSep::PEReference(pe_reference) - }), - value(DeclSep::S, s), - ))(input) -} #[derive(Debug)] pub enum IntSubsetDeclaration<'s> { MarkupDecl(MarkupDecl<'s>), DeclSep(DeclSep<'s>), } -type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>; /// [28b] intSubset ::= (markupdecl | DeclSep)* -pub fn int_subset(input: &str) -> IResult<&str, IntSubset> { - many0(alt(( - map(markup_decl, |markup_decl| { - IntSubsetDeclaration::MarkupDecl(markup_decl) - }), - map(decl_sep, |decl_sep| IntSubsetDeclaration::DeclSep(decl_sep)), - )))(input) -} +pub type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>; +/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment #[derive(Debug)] pub enum MarkupDecl<'s> { Elementdecl(Elementdecl<'s>), @@ -546,117 +234,44 @@ pub enum MarkupDecl<'s> { PI(PI<'s>), Comment(Comment<'s>), } -/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment -pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> { - alt(( - map(elementdecl, |elementdecl| { - MarkupDecl::Elementdecl(elementdecl) - }), - map(attlist_decl, |attlist_decl| { - MarkupDecl::AttlistDecl(attlist_decl) - }), - map(entity_decl, |entity_decl| { - MarkupDecl::EntityDecl(entity_decl) - }), - map(notation_decl, |notation_decl| { - MarkupDecl::NotationDecl(notation_decl) - }), - map(pi, |pi| MarkupDecl::PI(pi)), - map(comment, |comment| MarkupDecl::Comment(comment)), - ))(input) -} +/// [30] extSubset ::= TextDecl? extSubsetDecl pub struct ExtSubset<'s> { text_decl: Option<TextDecl<'s>>, ext_subset_decl: ExtSubsetDecl<'s>, } -/// [30] extSubset ::= TextDecl? extSubsetDecl -pub fn ext_subset(input: &str) -> IResult<&str, ExtSubset> { - map( - pair(opt(text_decl), ext_subset_decl), - |(text_decl, ext_subset_decl)| ExtSubset { - text_decl, - ext_subset_decl, - }, - )(input) -} pub enum ExtSubsetDeclaration<'s> { MarkupDecl(MarkupDecl<'s>), ConditionalSect(ConditionalSect<'s>), DeclSep(DeclSep<'s>), } -type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>; /// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)* -pub fn ext_subset_decl(input: &str) -> IResult<&str, ExtSubsetDecl> { - many0(alt(( - map(markup_decl, |markup_decl| { - ExtSubsetDeclaration::MarkupDecl(markup_decl) - }), - map(conditional_sect, |conditional_sect| { - ExtSubsetDeclaration::ConditionalSect(conditional_sect) - }), - map(decl_sep, |decl_sep| ExtSubsetDeclaration::DeclSep(decl_sep)), - )))(input) -} +type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>; -pub type SDDecl = bool; /// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) -pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> { - preceded( - tuple((s, tag("standalone"), eq)), - alt(( - delimited( - char('\''), - alt((value(true, tag("yes")), value(false, tag("no")))), - char('\''), - ), - delimited( - char('"'), - alt((value(true, tag("yes")), value(false, tag("no")))), - char('"'), - ), - )), - )(input) -} +pub type SDDecl = bool; // (Productions 33 through 38 have been removed.) +/// [39] element ::= EmptyElemTag | STag content ETag #[derive(Debug, Clone)] pub enum Element<'s> { Empty(EmptyElemTag<'s>), NotEmpty(STag<'s>, Content<'s>, ETag<'s>), } -/// [39] element ::= EmptyElemTag | STag content ETag -pub fn element(input: &str) -> IResult<&str, Element> { - alt(( - map(empty_elem_tag, |empty_elem_tag| { - Element::Empty(empty_elem_tag) - }), - map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| { - Element::NotEmpty(s_tag, content, e_tag) - }), - ))(input) -} +/// [12] STag ::= '<' QName (S Attribute)* S? '>' +/// [40] STag ::= '<' Name (S Attribute)* S? '>' #[derive(Debug, Clone)] pub struct STag<'s> { name: QName<'s>, attributes: Vec<Attribute<'s>>, } -/// [12] STag ::= '<' QName (S Attribute)* S? '>' -/// [40] STag ::= '<' Name (S Attribute)* S? '>' -pub fn s_tag(input: &str) -> IResult<&str, STag> { - map( - delimited( - tag("<"), - pair(q_name, many0(preceded(s, attribute))), - pair(opt(s), tag(">")), - ), - |(name, attributes)| STag { name, attributes }, - )(input) -} +/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue +// pub type Attribute<'s> = (Name<'s>, AttValue<'s>); +/// [41] Attribute ::= Name Eq AttValue #[derive(Debug, Clone)] pub enum Attribute<'s> { NamespaceDeclaration { @@ -668,36 +283,13 @@ pub enum Attribute<'s> { value: AttValue<'s>, }, } -/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue -pub fn attribute(input: &str) -> IResult<&str, Attribute> { - alt(( - map( - separated_pair(ns_att_name, eq, att_value), - |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value }, - ), - map(separated_pair(q_name, eq, att_value), |(name, value)| { - Attribute::Attribute { name, value } - }), - ))(input) -} -// pub type Attribute<'s> = (Name<'s>, AttValue<'s>); -/// [41] Attribute ::= Name Eq AttValue -// pub fn attribute(input: &str) -> IResult<&str, Attribute> { -// separated_pair(name, eq, att_value)(input) -// } +/// [13] ETag ::= '</' QName S? '>' +/// [42] ETag ::= '</' Name S? '>' #[derive(Debug, Clone)] pub struct ETag<'s> { name: QName<'s>, } -/// [13] ETag ::= '</' QName S? '>' -/// [42] ETag ::= '</' Name S? '>' -pub fn e_tag(input: &str) -> IResult<&str, ETag> { - map( - delimited(tag("</"), q_name, pair(opt(s), tag(">"))), - |name| ETag { name }, - )(input) -} #[derive(Debug, Clone)] pub enum ContentItem<'s> { @@ -708,68 +300,31 @@ pub enum ContentItem<'s> { PI(PI<'s>), Comment(Comment<'s>), } +/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* #[derive(Debug, Clone)] pub struct Content<'s> { char_data: Option<CharData<'s>>, content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>, } -/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* -pub fn content(input: &str) -> IResult<&str, Content> { - map( - pair( - opt(char_data), - many0(pair( - alt(( - map(element, |element| ContentItem::Element(element)), - map(reference, |reference| ContentItem::Reference(reference)), - map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)), - map(pi, |pi| ContentItem::PI(pi)), - map(comment, |comment| ContentItem::Comment(comment)), - )), - opt(char_data), - )), - ), - |(char_data, content)| Content { char_data, content }, - )(input) -} +/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' +/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec] #[derive(Debug, Clone)] pub struct EmptyElemTag<'s> { name: QName<'s>, attributes: Vec<Attribute<'s>>, } -/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' -/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec] -pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> { - map( - delimited( - tag("<"), - pair(q_name, many0(preceded(s, attribute))), - pair(opt(s), tag("/>")), - ), - |(name, attributes)| EmptyElemTag { name, attributes }, - )(input) -} +/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>' +/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' #[derive(Debug)] pub struct Elementdecl<'s> { name: QName<'s>, contentspec: Contentspec<'s>, } -/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>' -/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' -pub fn elementdecl(input: &str) -> IResult<&str, Elementdecl> { - map( - delimited( - pair(tag("<!ELEMENT"), s), - separated_pair(q_name, s, contentspec), - pair(opt(s), tag(">")), - ), - |(name, contentspec)| Elementdecl { name, contentspec }, - )(input) -} // TODO: casings??? +/// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children #[derive(Clone, Debug)] pub enum Contentspec<'s> { Empty, @@ -777,16 +332,8 @@ pub enum Contentspec<'s> { Mixed(Mixed<'s>), Children(Children<'s>), } -/// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children -pub fn contentspec(input: &str) -> IResult<&str, Contentspec> { - alt(( - value(Contentspec::Empty, tag("EMPTY")), - value(Contentspec::Any, tag("ANY")), - map(mixed, |mixed| Contentspec::Mixed(mixed)), - map(children, |children| Contentspec::Children(children)), - ))(input) -} +/// Occurence ::= ('?' | '*' | '+')? #[derive(Clone, Debug)] pub enum Occurence { Once, @@ -794,46 +341,18 @@ pub enum Occurence { Many0, Many1, } -/// Occurence ::= ('?' | '*' | '+')? -pub fn occurence(input: &str) -> IResult<&str, Occurence> { - map( - opt(alt((tag("?"), tag("*"), tag("+")))), - |occurence| match occurence { - Some("?") => Occurence::Optional, - Some("*") => Occurence::Many0, - Some("+") => Occurence::Many1, - _ => Occurence::Once, - }, - )(input) -} #[derive(Clone, Debug)] pub enum ChildrenKind<'s> { Choice(Choice<'s>), Seq(Seq<'s>), } +/// [47] children ::= (choice | seq) ('?' | '*' | '+')? #[derive(Clone, Debug)] pub struct Children<'s> { kind: ChildrenKind<'s>, occurence: Occurence, } -/// [47] children ::= (choice | seq) ('?' | '*' | '+')? -pub fn children(input: &str) -> IResult<&str, Children> { - map( - pair( - alt(( - map(choice, |choice| ChildrenKind::Choice(choice)), - map(seq, |seq| ChildrenKind::Seq(seq)), - )), - occurence, - ), - |(kind, occurence)| Children { kind, occurence }, - )(input) - // alt(( - // map(pair(choice, occurence), |(choice, occurence)| Children::Choice(choice, occurence)), - // map(pair(seq, occurence), |(seq, occurence)| Children::Seq(seq, occurence)) - // ))(input) -} #[derive(Clone, Debug)] pub enum CpKind<'s> { @@ -841,165 +360,63 @@ pub enum CpKind<'s> { Choice(Choice<'s>), Seq(Seq<'s>), } +/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')? +/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? #[derive(Clone, Debug)] pub struct Cp<'s> { kind: CpKind<'s>, occurence: Occurence, } -/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')? -/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? -pub fn cp(input: &str) -> IResult<&str, Cp> { - map( - pair( - alt(( - map(q_name, |name| CpKind::Name(name)), - map(choice, |choice| CpKind::Choice(choice)), - map(seq, |seq| CpKind::Seq(seq)), - )), - occurence, - ), - |(kind, occurence)| Cp { kind, occurence }, - )(input) -} +/// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' #[derive(Clone, Debug)] pub struct Choice<'s>(Vec<Cp<'s>>); -/// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' -pub fn choice(input: &str) -> IResult<&str, Choice> { - map( - delimited( - pair(tag("("), opt(s)), - pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))), - pair(opt(s), tag(")")), - ), - |(head, tail)| { - let choice = vec![vec![head], tail].concat(); - Choice(choice) - }, - )(input) -} +/// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' #[derive(Clone, Debug)] pub struct Seq<'s>(Vec<Cp<'s>>); -/// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' -pub fn seq(input: &str) -> IResult<&str, Seq> { - map( - delimited( - pair(tag("("), opt(s)), - pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))), - pair(opt(s), tag(")")), - ), - |(head, tail)| { - let seq = vec![vec![head], tail].concat(); - Seq(seq) - }, - )(input) -} // always contains #PCDATA -#[derive(Clone, Debug)] -pub struct Mixed<'s>(Vec<QName<'s>>); /// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')' /// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' -pub fn mixed(input: &str) -> IResult<&str, Mixed> { - alt(( - map( - delimited( - tuple((tag("("), s, tag("#PCDATA"))), - many0(preceded(tuple((opt(s), tag("|"), opt(s))), q_name)), - pair(opt(s), tag(")*")), - ), - |names| Mixed(names), - ), - value( - Mixed(Vec::new()), - tuple((tag("("), opt(s), tag("#PCDATA"), opt(s), tag(")"))), - ), - ))(input) -} +#[derive(Clone, Debug)] +pub struct Mixed<'s>(Vec<QName<'s>>); +/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>' +/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' #[derive(Debug)] pub struct AttlistDecl<'s> { element_type: QName<'s>, att_defs: Vec<AttDef<'s>>, } -/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>' -/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' -pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> { - map( - delimited( - pair(tag("<!ATTLIST"), s), - pair(q_name, many0(att_def)), - pair(opt(s), tag(">")), - ), - |(element_type, att_defs)| AttlistDecl { - element_type, - att_defs, - }, - )(input) -} #[derive(Debug)] pub enum AttDefName<'s> { QName(QName<'s>), NSAttName(NSAttName<'s>), } +/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl +/// [53] AttDef ::= S Name S AttType S DefaultDecl #[derive(Debug)] pub struct AttDef<'s> { name: AttDefName<'s>, att_type: AttType<'s>, default_decl: DefaultDecl<'s>, } -/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl -/// [53] AttDef ::= S Name S AttType S DefaultDecl -pub fn att_def(input: &str) -> IResult<&str, AttDef> { - map( - tuple(( - preceded( - s, - alt(( - map(q_name, |q_name| AttDefName::QName(q_name)), - map(ns_att_name, |ns_att_name| { - AttDefName::NSAttName(ns_att_name) - }), - )), - ), - preceded(s, att_type), - preceded(s, default_decl), - )), - |(name, att_type, default_decl)| AttDef { - name, - att_type, - default_decl, - }, - )(input) -} +/// [54] AttType ::= StringType | TokenizedType | EnumeratedType #[derive(Clone, Debug)] pub enum AttType<'s> { StringType, TokenizedType(TokenizedType), EnumeratedType(EnumeratedType<'s>), } -/// [54] AttType ::= StringType | TokenizedType | EnumeratedType -pub fn att_type(input: &str) -> IResult<&str, AttType> { - alt(( - value(AttType::StringType, string_type), - map(tokenized_type, |tokenized_type| { - AttType::TokenizedType(tokenized_type) - }), - map(enumerated_type, |enumerated_type| { - AttType::EnumeratedType(enumerated_type) - }), - ))(input) -} -pub type StringType<'s> = &'s str; /// [55] StringType ::= 'CDATA' -pub fn string_type(input: &str) -> IResult<&str, StringType> { - tag("CDATA")(input) -} +#[derive(Clone)] +pub struct StringType; +/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' #[derive(Clone, Debug)] pub enum TokenizedType { ID, @@ -1010,265 +427,97 @@ pub enum TokenizedType { NMToken, NMTokens, } -/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' -pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> { - alt(( - value(TokenizedType::ID, tag("ID")), - // TODO: check if this is required - // try idrefs first to avoid losing 'S' - value(TokenizedType::IDRefs, tag("IDREFS")), - value(TokenizedType::IDRef, tag("IDREF")), - value(TokenizedType::Entity, tag("ENTITY")), - value(TokenizedType::Entities, tag("ENTITIES")), - // same here - value(TokenizedType::NMTokens, tag("NMTOKENS")), - value(TokenizedType::NMToken, tag("NMTOKEN")), - ))(input) -} +/// [57] EnumeratedType ::= NotationType | Enumeration #[derive(Debug, Clone)] pub enum EnumeratedType<'s> { NotationType(NotationType<'s>), Enumeration(Enumeration<'s>), } -/// [57] EnumeratedType ::= NotationType | Enumeration -pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> { - alt(( - map(notation_type, |notation_type| { - EnumeratedType::NotationType(notation_type) - }), - map(enumeration, |enumeration| { - EnumeratedType::Enumeration(enumeration) - }), - ))(input) -} +/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' #[derive(Debug, Clone)] pub struct NotationType<'s>(Vec<Name<'s>>); -/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' -pub fn notation_type(input: &str) -> IResult<&str, NotationType> { - map( - delimited( - tuple((tag("NOTATION"), s, tag("("), opt(s))), - pair( - name, - many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)), - ), - pair(opt(s), tag(")")), - ), - |(head, tail)| { - let notation_type = vec![vec![head], tail].concat(); - NotationType(notation_type) - }, - )(input) -} +/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' #[derive(Debug, Clone)] pub struct Enumeration<'s>(Vec<Nmtoken<'s>>); -/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' -pub fn enumeration(input: &str) -> IResult<&str, Enumeration> { - map( - delimited( - pair(tag("("), opt(s)), - pair( - nmtoken, - many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)), - ), - pair(opt(s), tag(")")), - ), - |(head, tail)| { - let enumeration = vec![vec![head], tail].concat(); - Enumeration(enumeration) - }, - )(input) -} +/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) #[derive(Debug, Clone)] pub enum DefaultDecl<'s> { Required, Implied, Fixed(AttValue<'s>), } -/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) -pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> { - alt(( - value(DefaultDecl::Required, tag("#REQUIRED")), - value(DefaultDecl::Implied, tag("#IMPLIED")), - map( - preceded(opt(pair(tag("#FIXED"), s)), att_value), - |att_value| DefaultDecl::Fixed(att_value), - ), - ))(input) -} +/// [61] conditionalSect ::= includeSect | ignoreSect pub enum ConditionalSect<'s> { IncludeSect(IncludeSect<'s>), IgnoreSect(IgnoreSect<'s>), } -/// [61] conditionalSect ::= includeSect | ignoreSect -pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> { - alt(( - map(include_sect, |include_sect| { - ConditionalSect::IncludeSect(include_sect) - }), - map(ignore_sect, |ignore_sect| { - ConditionalSect::IgnoreSect(ignore_sect) - }), - ))(input) -} -pub struct IncludeSect<'s>(ExtSubsetDecl<'s>); /// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' -pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> { - map( - delimited( - tuple((tag("<!["), opt(s), tag("INCLUDE"), opt(s), tag("["))), - ext_subset_decl, - tag("]]>"), - ), - |ext_subset_decl| IncludeSect(ext_subset_decl), - )(input) -} +pub struct IncludeSect<'s>(ExtSubsetDecl<'s>); -pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>); /// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' -pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> { - map( - delimited( - tuple((tag("<!["), opt(s), tag("IGNORE"), opt(s), tag("["))), - many0(ignore_sect_contents), - tag("]]>"), - ), - |ignore_sect_contents| IgnoreSect(ignore_sect_contents), - )(input) -} +pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>); +/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* pub struct IgnoreSectContents<'s> { // TODO: what the fuck does this mean ignore: Ignore<'s>, ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>, } -/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* -pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> { - map( - pair( - ignore, - many0(tuple(( - delimited(tag("<!["), ignore_sect_contents, tag("]]>")), - ignore, - ))), - ), - |(ignore, ignore_list)| IgnoreSectContents { - ignore, - ignore_list, - }, - )(input) -} -pub type Ignore<'s> = &'s str; /// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) -pub fn ignore(input: &str) -> IResult<&str, Ignore> { - recognize(many_till(xmlchar, peek(alt((tag("<!["), tag("]]>"))))))(input) -} +pub struct Ignore<'s>(&'s str); +/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' #[derive(Clone, Debug)] pub enum CharRef<'s> { Decimal(&'s str), Hexadecimal(&'s str), } -/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' -pub fn char_ref(input: &str) -> IResult<&str, CharRef> { - alt(( - delimited( - tag("&#"), - map(take_while(|c| matches!(c, '0'..='9')), |decimal| { - CharRef::Decimal(decimal) - }), - tag(";"), - ), - delimited( - tag("&#x"), - map( - take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )), - |hexadecimal| CharRef::Hexadecimal(hexadecimal), - ), - tag(";"), - ), - ))(input) -} +/// [67] Reference ::= EntityRef | CharRef #[derive(Clone, Debug)] pub enum Reference<'s> { EntityRef(EntityRef<'s>), CharRef(CharRef<'s>), } -/// [67] Reference ::= EntityRef | CharRef -pub fn reference(input: &str) -> IResult<&str, Reference> { - alt(( - map(entity_ref, |entity_ref| Reference::EntityRef(entity_ref)), - map(char_ref, |char_ref| Reference::CharRef(char_ref)), - ))(input) -} -pub type EntityRef<'s> = &'s str; /// [68] EntityRef ::= '&' Name ';' -pub fn entity_ref(input: &str) -> IResult<&str, EntityRef> { - delimited(tag("&"), name, tag(";"))(input) -} +#[derive(Clone, Debug)] +pub struct EntityRef<'s>(Name<'s>); -pub type PEReference<'s> = &'s str; /// [69] PEReference ::= '%' Name ';' -pub fn pe_reference(input: &str) -> IResult<&str, PEReference> { - delimited(tag("%"), name, tag(";"))(input) -} +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct PEReference<'s>(Name<'s>); +/// [70] EntityDecl ::= GEDecl | PEDecl #[derive(Debug)] pub enum EntityDecl<'s> { GEDecl(GEDecl<'s>), PEDecl(PEDecl<'s>), } -/// [70] EntityDecl ::= GEDecl | PEDecl -pub fn entity_decl(input: &str) -> IResult<&str, EntityDecl> { - alt(( - map(ge_decl, |ge_decl| EntityDecl::GEDecl(ge_decl)), - map(pe_decl, |pe_decl| EntityDecl::PEDecl(pe_decl)), - ))(input) -} +/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' #[derive(Debug)] pub struct GEDecl<'s> { name: Name<'s>, entity_def: EntityDef<'s>, } -/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' -pub fn ge_decl(input: &str) -> IResult<&str, GEDecl> { - map( - delimited( - pair(tag("<!ENTITY"), s), - separated_pair(name, s, entity_def), - pair(opt(s), tag(">")), - ), - |(name, entity_def)| GEDecl { name, entity_def }, - )(input) -} +/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' #[derive(Debug)] pub struct PEDecl<'s> { name: Name<'s>, pe_def: PEDef<'s>, } -/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' -pub fn pe_decl(input: &str) -> IResult<&str, PEDecl> { - map( - delimited( - tuple((tag("<!ENTITY"), s, tag("%"), s)), - separated_pair(name, s, pe_def), - pair(opt(s), tag(">")), - ), - |(name, pe_def)| PEDecl { name, pe_def }, - )(input) -} +/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) #[derive(Debug)] pub enum EntityDef<'s> { EntityValue(EntityValue<'s>), @@ -1277,249 +526,62 @@ pub enum EntityDef<'s> { ndata_decl: Option<NDataDecl<'s>>, }, } -/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) -pub fn entity_def(input: &str) -> IResult<&str, EntityDef> { - alt(( - map(entity_value, |entity_value| { - EntityDef::EntityValue(entity_value) - }), - map( - pair(external_id, opt(ndata_decl)), - |(external_id, ndata_decl)| EntityDef::ExternalID { - external_id, - ndata_decl, - }, - ), - ))(input) -} +/// [74] PEDef ::= EntityValue | ExternalID #[derive(Debug)] pub enum PEDef<'s> { EntityValue(EntityValue<'s>), ExternalID(ExternalID<'s>), } -/// [74] PEDef ::= EntityValue | ExternalID -pub fn pe_def(input: &str) -> IResult<&str, PEDef> { - alt(( - map(entity_value, |entity_value| { - PEDef::EntityValue(entity_value) - }), - map(external_id, |external_id| PEDef::ExternalID(external_id)), - ))(input) -} +/// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral #[derive(Debug)] pub enum ExternalID<'s> { SYSTEM { - system_identifier: &'s str, + system_identifier: SystemLiteral<'s>, }, PUBLIC { - public_identifier: &'s str, - system_identifier: &'s str, + public_identifier: PubidLiteral<'s>, + system_identifier: SystemLiteral<'s>, }, } -/// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral -// pub fn external_id(input: &str) -> IResult<&str, ExternalID> { -pub fn external_id(input: &str) -> IResult<&str, ExternalID> { - alt(( - map( - preceded(pair(tag("SYSTEM"), s), system_literal), - |system_identifier| ExternalID::SYSTEM { system_identifier }, - ), - map( - preceded( - pair(tag("PUBLIC"), s), - separated_pair(pubid_literal, s, system_literal), - ), - |(public_identifier, system_identifier)| ExternalID::PUBLIC { - public_identifier, - system_identifier, - }, - ), - ))(input) -} -pub type NDataDecl<'s> = &'s str; /// [76] NDataDecl ::= S 'NDATA' S Name -pub fn ndata_decl(input: &str) -> IResult<&str, NDataDecl> { - preceded(tuple((s, tag("NDATA"), s)), name)(input) -} +#[derive(Debug)] +pub struct NDataDecl<'s>(Name<'s>); +/// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' pub struct TextDecl<'s> { version_info: Option<VersionInfo>, encoding_decl: EncodingDecl<'s>, } -/// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' -pub fn text_decl(input: &str) -> IResult<&str, TextDecl> { - map( - delimited( - tag("<?xml"), - pair(opt(version_info), terminated(encoding_decl, opt(s))), - tag("?>"), - ), - |(version_info, encoding_decl)| TextDecl { - version_info, - encoding_decl, - }, - )(input) -} +/// [78] extParsedEnt ::= TextDecl? content pub struct ExtParsedEnt<'s> { text_decl: Option<TextDecl<'s>>, content: Content<'s>, } -/// [78] extParsedEnt ::= TextDecl? content -pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> { - map(pair(opt(text_decl), content), |(text_decl, content)| { - ExtParsedEnt { text_decl, content } - })(input) -} -pub type EncodingDecl<'s> = EncName<'s>; /// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName -pub fn encoding_decl(input: &str) -> IResult<&str, EncodingDecl> { - preceded( - tuple((s, tag("encoding"), eq)), - alt(( - delimited(char('"'), enc_name, char('"')), - delimited(char('\''), enc_name, char('\'')), - )), - )(input) -} +#[derive(Debug)] +pub struct EncodingDecl<'s>(EncName<'s>); -pub type EncName<'s> = &'s str; /// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* -pub fn enc_name(input: &str) -> IResult<&str, EncName> { - recognize(pair( - satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )), - many0(satisfy( - |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ), - )), - ))(input) -} - #[derive(Debug)] -pub struct NotationDecl<'s> { - name: &'s str, - id: NotationDeclID<'s>, -} +pub struct EncName<'s>(&'s str); + #[derive(Debug)] pub enum NotationDeclID<'s> { External(ExternalID<'s>), Public(PublicID<'s>), } /// [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' -pub fn notation_decl(input: &str) -> IResult<&str, NotationDecl> { - map( - delimited( - pair(tag("<!NOTATION"), s), - separated_pair( - name, - s, - alt(( - map(external_id, |external_id| { - NotationDeclID::External(external_id) - }), - map(public_id, |public_id| NotationDeclID::Public(public_id)), - )), - ), - pair(opt(s), tag(">")), - ), - |(name, id)| NotationDecl { name, id }, - )(input) +#[derive(Debug)] +pub struct NotationDecl<'s> { + name: Name<'s>, + id: NotationDeclID<'s>, } -pub type PublicID<'s> = &'s str; /// [83] PublicID ::= 'PUBLIC' S PubidLiteral -pub fn public_id(input: &str) -> IResult<&str, PublicID> { - preceded(pair(tag("PUBLIC"), s), pubid_literal)(input) -} - -#[cfg(test)] -mod tests { - use std::num::NonZero; - - use super::*; - - #[test] - fn test_char_data() { - assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi")); - assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi")); - assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi")); - assert_eq!(Ok(("]]>def&ghi", "abc")), char_data("abc]]>def&ghi")); - assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi")); - assert_eq!( - Err(Err::Incomplete(nom::Needed::Size( - NonZero::new(3usize).unwrap() - ))), - char_data("abcdefghi") - ); - } - - #[test] - fn test_comment() { - assert_eq!(Ok(("", "")), comment("<!---->")); - assert_eq!(Ok(("", "asdf")), comment("<!--asdf-->")); - assert_eq!(Ok(("", "as-df")), comment("<!--as-df-->")); - assert_eq!( - Err(Err::Incomplete(nom::Needed::Size( - NonZero::new(2usize).unwrap() - ))), - comment("<!--asdf") - ); - } - - #[test] - fn test_pi_target() { - assert_eq!(Ok((" ", "asdf")), pi_target("asdf ")); - assert_eq!(Ok((" ", "xmlasdf")), pi_target("xmlasdf ")); - assert_eq!( - Err(Err::Error(Error { - input: "xml ", - code: ErrorKind::Tag - })), - pi_target("xml ") - ); - assert_eq!( - Err(Err::Error(Error { - input: "xMl ", - code: ErrorKind::Tag - })), - pi_target("xMl ") - ); - } - - #[test] - fn test_cd_sect() { - assert_eq!( - Ok(( - "", - ("<![CDATA[", "<greeting>Hello, world!</greeting>", "]]>") - )), - cd_sect("<![CDATA[<greeting>Hello, world!</greeting>]]>") - ) - } - - #[test] - fn test_cd_start() { - assert_eq!(Ok(("asdf", "<![CDATA[")), cd_start("<![CDATA[asdf")) - } - - #[test] - fn test_cdata() { - assert_eq!(Ok(("]]>asdf", "asdf")), cdata("asdf]]>asdf")); - assert_eq!( - Ok(("]]>asdf", "<![CDATA[asdf")), - cdata("<![CDATA[asdf]]>asdf") - ); - assert_eq!( - Ok(("]]>asdf", "<greeting>Hello, world!</greeting>")), - cdata("<greeting>Hello, world!</greeting>]]>asdf") - ) - } - - #[test] - fn test_cd_end() { - assert_eq!(Ok(("asdf", "]]>")), cd_end("]]>asdf")) - } -} +#[derive(Debug)] +pub struct PublicID<'s>(PubidLiteral<'s>); diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs index 8b13789..135f5c7 100644 --- a/src/xml/parsers.rs +++ b/src/xml/parsers.rs @@ -1 +1,1503 @@ +use nom::{ + branch::alt, + bytes::streaming::{is_a, tag, take, take_while}, + character::{ + complete::one_of, + streaming::{char, none_of, satisfy}, + }, + combinator::{map, not, opt, peek, recognize, value}, + error::{Error, ErrorKind}, + multi::{many0, many1, many_till}, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, + Err, IResult, Parser as NomParser, +}; +use crate::xml::NSAttName; + +use super::{ + AttDef, AttDefName, AttType, AttValue, AttlistDecl, Attribute, CDEnd, CDSect, CDStart, CData, + Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment, ConditionalSect, Content, + ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName, DefaultDecl, DoctypeDecl, + Document, ETag, Element, Elementdecl, EmptyElemTag, EncName, EncodingDecl, EntityDecl, + EntityDef, EntityRef, EntityValue, EnumeratedType, Enumeration, Eq, ExtParsedEnt, ExtSubset, + ExtSubsetDecl, ExtSubsetDeclaration, ExternalID, GEDecl, Ignore, IgnoreSect, + IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration, LiteralData, LocalPart, + MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names, Nmtoken, + Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef, PEReference, + PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral, PublicID, + QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl, TokenizedType, + UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S, +}; + +pub trait Parser<'s, T> { + fn parse(input: &'s str) -> IResult<&str, T>; +} + +/// [1] NSAttName ::= PrefixedAttName | DefaultAttName +impl<'s> Parser<'s, NSAttName<'s>> for NSAttName<'s> { + fn parse(input: &'s str) -> IResult<&str, NSAttName<'s>> { + alt(( + map(PrefixedAttName::parse, |prefixed_att_name| { + NSAttName::PrefixedAttName(prefixed_att_name) + }), + value(NSAttName::DefaultAttName, DefaultAttName::parse), + ))(input) + } +} + +/// [2] PrefixedAttName ::= 'xmlns:' NCName +impl<'s> Parser<'s, PrefixedAttName<'s>> for PrefixedAttName<'s> { + fn parse(input: &'s str) -> IResult<&str, PrefixedAttName<'s>> { + map(preceded(tag("xmlns:"), NCName::parse), |nc_name| { + PrefixedAttName(nc_name) + })(input) + } +} + +/// [3] DefaultAttName ::= 'xmlns'; +impl Parser<'_, DefaultAttName> for DefaultAttName { + fn parse(input: &str) -> IResult<&str, DefaultAttName> { + value(DefaultAttName, tag("xmlns"))(input) + } +} + +/// [4] NCName ::= Name - (Char* ':' Char*) +impl<'s> Parser<'s, NCName<'s>> for NCName<'s> { + fn parse(input: &'s str) -> IResult<&str, NCName<'s>> { + map( + recognize(pair( + recognize(NameStartChar::parse).and_then(satisfy(|c| c != ':')), + many_till(NameChar::parse, peek(char(':'))), + )), + |nc_name| NCName(nc_name), + )(input) + } +} + +/// [7] QName ::= PrefixedName | UnprefixedName +impl<'s> Parser<'s, QName<'s>> for QName<'s> { + fn parse(input: &'s str) -> IResult<&str, QName<'s>> { + alt(( + map(PrefixedName::parse, |prefixed_name| { + QName::PrefixedName(prefixed_name) + }), + map(UnprefixedName::parse, |unprefixed_name| { + QName::UnprefixedName(unprefixed_name) + }), + ))(input) + } +} + +/// [8] PrefixedName ::= Prefix ':' LocalPart +impl<'s> Parser<'s, PrefixedName<'s>> for PrefixedName<'s> { + fn parse(input: &'s str) -> IResult<&str, PrefixedName<'s>> { + map( + separated_pair(Prefix::parse, char(':'), LocalPart::parse), + |(prefix, local_part)| PrefixedName { prefix, local_part }, + )(input) + } +} + +/// [9] UnprefixedName ::= LocalPart +impl<'s> Parser<'s, UnprefixedName<'s>> for UnprefixedName<'s> { + fn parse(input: &'s str) -> IResult<&str, UnprefixedName<'s>> { + map(LocalPart::parse, |local_part| UnprefixedName(local_part))(input) + } +} + +/// [10] Prefix ::= NCName +impl<'s> Parser<'s, Prefix<'s>> for Prefix<'s> { + fn parse(input: &'s str) -> IResult<&str, Prefix<'s>> { + map(NCName::parse, |nc_name| Prefix(nc_name))(input) + } +} + +/// [11] LocalPart ::= NCName +impl<'s> Parser<'s, LocalPart<'s>> for LocalPart<'s> { + fn parse(input: &'s str) -> IResult<&str, LocalPart<'s>> { + map(NCName::parse, |nc_name| LocalPart(nc_name))(input) + } +} + +// xml spec + +/// [1] document ::= prolog element Misc* +impl<'s> Parser<'s, Document<'s>> for Document<'s> { + fn parse(input: &'s str) -> IResult<&str, Document<'s>> { + tuple((Prolog::parse, Element::parse, many0(Misc::parse)))(input) + } +} + +/// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ +impl Parser<'_, Char> for Char { + fn parse(input: &str) -> IResult<&str, Char> { + map( + satisfy( + |c| matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}'), + ), + |char| Char(char), + )(input) + } +} + +/// [3] S ::= (#x20 | #x9 | #xD | #xA)+ +impl<'s> Parser<'s, S<'s>> for S<'s> { + fn parse(input: &'s str) -> IResult<&str, S<'s>> { + map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input) + } +} + +/// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] +impl Parser<'_, NameStartChar> for NameStartChar { + fn parse(input: &str) -> IResult<&str, NameStartChar> { + map( + satisfy( + |c| matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}'), + ), + |c| NameStartChar(c), + )(input) + } +} + +/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] +impl Parser<'_, NameChar> for NameChar { + fn parse(input: &str) -> IResult<&str, NameChar> { + map( + alt(( + map(NameStartChar::parse, |NameStartChar(c)| c), + satisfy( + |c| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'), + ), + )), + |c| NameChar(c), + )(input) + } +} + +/// [5] Name ::= NameStartChar (NameChar)* +impl<'s> Parser<'s, Name<'s>> for Name<'s> { + fn parse(input: &'s str) -> IResult<&str, Name<'s>> { + map( + recognize(pair(NameStartChar::parse, many0(NameChar::parse))), + |name| Name(name), + )(input) + } +} + +/// [6] Names ::= Name (#x20 Name)* +impl<'s> Parser<'s, Names<'s>> for Names<'s> { + // TODO: fix + fn parse(input: &'s str) -> IResult<&str, Names<'s>> { + map( + recognize(pair(Name::parse, many0(pair(char('\u{20}'), Name::parse)))), + |names| Names(names), + )(input) + } +} + +/// [7] Nmtoken ::= (NameChar)+ +impl<'s> Parser<'s, Nmtoken<'s>> for Nmtoken<'s> { + fn parse(input: &'s str) -> IResult<&str, Nmtoken<'s>> { + map(recognize(many1(NameChar::parse)), |nmtoken| { + Nmtoken(nmtoken) + })(input) + } +} + +/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* +impl<'s> Parser<'s, Nmtokens<'s>> for Nmtokens<'s> { + fn parse(input: &'s str) -> IResult<&str, Nmtokens<'s>> { + map( + recognize(pair( + Nmtoken::parse, + many0(pair(char('\u{20}'), Nmtoken::parse)), + )), + |nmtokens| Nmtokens(nmtokens), + )(input) + } +} + +/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' +/// | "'" ([^%&'] | PEReference | Reference)* "'" +impl<'s> Parser<'s, EntityValue<'s>> for EntityValue<'s> { + fn parse(input: &'s str) -> IResult<&str, EntityValue<'s>> { + map( + alt(( + delimited( + char('"'), + many0(alt(( + map( + recognize(many_till(take(1usize), peek(one_of("%&\"")))), + |string| LiteralData::String(string), + ), + map(PEReference::parse, |pe_reference| { + LiteralData::PEReference(pe_reference) + }), + map(Reference::parse, |reference| { + LiteralData::Reference(reference) + }), + ))), + char('"'), + ), + delimited( + char('\''), + many0(alt(( + map( + recognize(many_till(take(1usize), peek(one_of("%&'")))), + |string| LiteralData::String(string), + ), + map(PEReference::parse, |pe_reference| { + LiteralData::PEReference(pe_reference) + }), + map(Reference::parse, |reference| { + LiteralData::Reference(reference) + }), + ))), + char('\''), + ), + )), + |entity_value| EntityValue(entity_value), + )(input) + } +} + +/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"' +/// | "'" ([^<&'] | Reference)* "'" +impl<'s> Parser<'s, AttValue<'s>> for AttValue<'s> { + fn parse(input: &'s str) -> IResult<&str, AttValue<'s>> { + map( + alt(( + delimited( + char('"'), + many0(alt(( + map( + recognize(many_till(take(1usize), peek(one_of("%&\"")))), + |string| LiteralData::String(string), + ), + map(Reference::parse, |reference| { + LiteralData::Reference(reference) + }), + ))), + char('"'), + ), + delimited( + char('\''), + many0(alt(( + map( + recognize(many_till(take(1usize), peek(one_of("%&'")))), + |string| LiteralData::String(string), + ), + map(Reference::parse, |reference| { + LiteralData::Reference(reference) + }), + ))), + char('\''), + ), + )), + |att_value| AttValue(att_value), + )(input) + } +} + +/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") +impl<'s> Parser<'s, SystemLiteral<'s>> for SystemLiteral<'s> { + fn parse(input: &'s str) -> IResult<&str, SystemLiteral<'s>> { + map( + alt(( + delimited(char('"'), recognize(many0(none_of("\""))), char('"')), + delimited(char('\''), recognize(many0(none_of("'"))), char('\'')), + )), + |system_literal| SystemLiteral(system_literal), + )(input) + } +} + +/// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" +impl<'s> Parser<'s, PubidLiteral<'s>> for PubidLiteral<'s> { + fn parse(input: &'s str) -> IResult<&str, PubidLiteral<'s>> { + map( + alt(( + delimited(char('"'), recognize(many0(PubidChar::parse)), char('"')), + delimited( + char('\''), + recognize(many0(recognize(not(char('\''))).and_then(PubidChar::parse))), + char('\''), + ), + )), + |pubid_literal| PubidLiteral(pubid_literal), + )(input) + } +} + +/// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] +impl Parser<'_, PubidChar> for PubidChar { + fn parse(input: &'_ str) -> IResult<&str, PubidChar> { + map( + satisfy( + |c| matches!(c, '\u{20}' | '\u{D}' | '\u{A}' | 'a'..='z' | 'A'..='Z' | '0'..='9'), + ), + |pubid_char| PubidChar(pubid_char), + )(input) + } +} + +/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) +impl<'s> Parser<'s, CharData<'s>> for CharData<'s> { + fn parse(input: &'s str) -> IResult<&str, CharData<'s>> { + map( + recognize(many_till( + none_of("<&"), + peek(alt((recognize(one_of("<&")), tag("]]>")))), + )), + |char_data| CharData(char_data), + )(input) + } +} + +/// Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' +impl<'s> Parser<'s, Comment<'s>> for Comment<'s> { + fn parse(input: &'s str) -> IResult<&str, Comment<'s>> { + map( + delimited( + tag("<!--"), + recognize(many_till(Char::parse, peek(tag("--")))), + tag("-->"), + ), + |comment| Comment(comment), + )(input) + } +} + +/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' +impl<'s> Parser<'s, PI<'s>> for PI<'s> { + fn parse(input: &'s str) -> IResult<&str, PI<'s>> { + map( + delimited( + tag("<?"), + pair( + PITarget::parse, + opt(recognize(pair( + S::parse, + many_till(Char::parse, peek(tag("?>"))), + ))), + ), + tag("?>"), + ), + |(target, instruction)| PI { + target, + instruction, + }, + )(input) + } +} + +/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) +impl<'s> Parser<'s, PITarget<'s>> for PITarget<'s> { + fn parse(input: &'s str) -> IResult<&str, PITarget<'s>> { + let (rest, name) = Name::parse(input)?; + if name.0.to_lowercase() == "xml" { + return Err(Err::Error(Error { + input, + // TODO: check if better error to return + code: ErrorKind::Tag, + })); + } else { + return Ok((rest, PITarget(name))); + } + } +} + +/// [18] CDSect ::= CDStart CData CDEnd +impl<'s> Parser<'s, CDSect<'s>> for CDSect<'s> { + fn parse(input: &'s str) -> IResult<&str, CDSect<'s>> { + map(CData::parse, |c_data| CDSect(c_data))(input) + } +} + +/// [19] CDStart ::= '<![CDATA[' +impl Parser<'_, CDStart> for CDStart { + fn parse(input: &'_ str) -> IResult<&str, CDStart> { + value(CDStart, tag("<![CDATA["))(input) + } +} + +/// [20] CData ::= (Char* - (Char* ']]>' Char*)) +impl<'s> Parser<'s, CData<'s>> for CData<'s> { + fn parse(input: &'s str) -> IResult<&str, CData<'s>> { + map( + recognize(many_till(Char::parse, peek(tag("]]>")))), + |c_data| CData(c_data), + )(input) + } +} + +/// [21] CDEnd ::= ']]>' +impl Parser<'_, CDEnd> for CDEnd { + fn parse(input: &'_ str) -> IResult<&str, CDEnd> { + value(CDEnd, tag("]]>"))(input) + } +} + +/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? +impl<'s> Parser<'s, Prolog<'s>> for Prolog<'s> { + fn parse(input: &'s str) -> IResult<&str, Prolog<'s>> { + tuple(( + opt(XMLDecl::parse), + many0(Misc::parse), + opt(tuple((DoctypeDecl::parse, many0(Misc::parse)))), + ))(input) + } +} + +/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' +impl<'s> Parser<'s, XMLDecl<'s>> for XMLDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, XMLDecl<'s>> { + map( + delimited( + tag("<?xml"), + tuple(( + VersionInfo::parse, + opt(EncodingDecl::parse), + opt(SDDecl::parse), + )), + pair(opt(S::parse), tag("?>")), + ), + |(version_info, encoding_decl, sd_decl)| XMLDecl { + version_info, + encoding_decl, + sd_decl, + }, + )(input) + } +} + +/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') +impl Parser<'_, VersionInfo> for VersionInfo { + fn parse(input: &'_ str) -> IResult<&str, VersionInfo> { + map( + preceded( + tuple((S::parse, tag("version"), Eq::parse)), + alt(( + delimited(char('\''), VersionNum::parse, char('\'')), + delimited(char('"'), VersionNum::parse, char('"')), + )), + ), + |version_num| VersionInfo(version_num), + )(input) + } +} + +/// [25] Eq ::= S? '=' S? +impl Parser<'_, Eq> for Eq { + fn parse(input: &'_ str) -> IResult<&str, Eq> { + value( + Eq, + recognize(tuple((opt(S::parse), char('='), opt(S::parse)))), + )(input) + } +} + +/// [26] VersionNum ::= '1.' [0-9]+ +impl Parser<'_, VersionNum> for VersionNum { + fn parse(input: &'_ str) -> IResult<&str, VersionNum> { + preceded( + tag("1."), + alt(( + value(VersionNum::One, char('0')), + value(VersionNum::OneDotOne, char('1')), + )), + )(input) + } +} + +/// [27] Misc ::= Comment | PI | S +impl<'s> Parser<'s, Misc<'s>> for Misc<'s> { + fn parse(input: &'s str) -> IResult<&str, Misc<'s>> { + alt(( + map(Comment::parse, |comment| Misc::Comment(comment)), + map(PI::parse, |pi| Misc::PI(pi)), + value(Misc::S, S::parse), + ))(input) + } +} + +/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' +/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' +impl<'s> Parser<'s, DoctypeDecl<'s>> for DoctypeDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, DoctypeDecl<'s>> { + map( + delimited( + pair(tag("<!DOCTYPE"), S::parse), + tuple(( + QName::parse, + opt(preceded(S::parse, ExternalID::parse)), + preceded( + opt(S::parse), + opt(terminated( + delimited(tag("["), IntSubset::parse, tag("]")), + opt(S::parse), + )), + ), + )), + tag(">"), + ), + |(name, external_id, int_subset)| DoctypeDecl { + name, + external_id, + int_subset, + }, + )(input) + } +} + +/// [28a] DeclSep ::= PEReference | S +impl<'s> Parser<'s, DeclSep<'s>> for DeclSep<'s> { + fn parse(input: &'s str) -> IResult<&str, DeclSep<'s>> { + alt(( + map(PEReference::parse, |pe_reference| { + DeclSep::PEReference(pe_reference) + }), + value(DeclSep::S, S::parse), + ))(input) + } +} + +/// [28b] intSubset ::= (markupdecl | DeclSep)* +impl<'s> Parser<'s, IntSubset<'s>> for IntSubset<'s> { + fn parse(input: &'s str) -> IResult<&str, IntSubset<'s>> { + many0(alt(( + map(MarkupDecl::parse, |markup_decl| { + IntSubsetDeclaration::MarkupDecl(markup_decl) + }), + map(DeclSep::parse, |decl_sep| { + IntSubsetDeclaration::DeclSep(decl_sep) + }), + )))(input) + } +} + +/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment +impl<'s> Parser<'s, MarkupDecl<'s>> for MarkupDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, MarkupDecl<'s>> { + alt(( + map(Elementdecl::parse, |elementdecl| { + MarkupDecl::Elementdecl(elementdecl) + }), + map(AttlistDecl::parse, |attlist_decl| { + MarkupDecl::AttlistDecl(attlist_decl) + }), + map(EntityDecl::parse, |entity_decl| { + MarkupDecl::EntityDecl(entity_decl) + }), + map(NotationDecl::parse, |notation_decl| { + MarkupDecl::NotationDecl(notation_decl) + }), + map(PI::parse, |pi| MarkupDecl::PI(pi)), + map(Comment::parse, |comment| MarkupDecl::Comment(comment)), + ))(input) + } +} + +/// [30] extSubset ::= TextDecl? extSubsetDecl +impl<'s> Parser<'s, ExtSubset<'s>> for ExtSubset<'s> { + fn parse(input: &'s str) -> IResult<&str, ExtSubset<'s>> { + map( + pair(opt(TextDecl::parse), ExtSubsetDecl::parse), + |(text_decl, ext_subset_decl)| ExtSubset { + text_decl, + ext_subset_decl, + }, + )(input) + } +} + +/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)* +impl<'s> Parser<'s, ExtSubsetDecl<'s>> for ExtSubsetDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, ExtSubsetDecl<'s>> { + many0(alt(( + map(MarkupDecl::parse, |markup_decl| { + ExtSubsetDeclaration::MarkupDecl(markup_decl) + }), + map(ConditionalSect::parse, |conditional_sect| { + ExtSubsetDeclaration::ConditionalSect(conditional_sect) + }), + map(DeclSep::parse, |decl_sep| { + ExtSubsetDeclaration::DeclSep(decl_sep) + }), + )))(input) + } +} + +/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) +impl Parser<'_, SDDecl> for SDDecl { + fn parse(input: &'_ str) -> IResult<&str, SDDecl> { + preceded( + tuple((S::parse, tag("standalone"), Eq::parse)), + alt(( + delimited( + char('\''), + alt((value(true, tag("yes")), value(false, tag("no")))), + char('\''), + ), + delimited( + char('"'), + alt((value(true, tag("yes")), value(false, tag("no")))), + char('"'), + ), + )), + )(input) + } +} + +// (Productions 33 through 38 have been removed.) + +/// [39] element ::= EmptyElemTag | STag content ETag +impl<'s> Parser<'s, Element<'s>> for Element<'s> { + fn parse(input: &'s str) -> IResult<&str, Element<'s>> { + alt(( + map(EmptyElemTag::parse, |empty_elem_tag| { + Element::Empty(empty_elem_tag) + }), + map( + tuple((STag::parse, Content::parse, ETag::parse)), + |(s_tag, content, e_tag)| Element::NotEmpty(s_tag, content, e_tag), + ), + ))(input) + } +} + +/// [12] STag ::= '<' QName (S Attribute)* S? '>' +/// [40] STag ::= '<' Name (S Attribute)* S? '>' +impl<'s> Parser<'s, STag<'s>> for STag<'s> { + fn parse(input: &'s str) -> IResult<&str, STag<'s>> { + map( + delimited( + tag("<"), + pair(QName::parse, many0(preceded(S::parse, Attribute::parse))), + pair(opt(S::parse), tag(">")), + ), + |(name, attributes)| STag { name, attributes }, + )(input) + } +} + +/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue +impl<'s> Parser<'s, Attribute<'s>> for Attribute<'s> { + fn parse(input: &'s str) -> IResult<&str, Attribute<'s>> { + alt(( + map( + separated_pair(NSAttName::parse, Eq::parse, AttValue::parse), + |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value }, + ), + map( + separated_pair(QName::parse, Eq::parse, AttValue::parse), + |(name, value)| Attribute::Attribute { name, value }, + ), + ))(input) + } +} +// pub type Attribute<'s> = (Name<'s>, AttValue<'s>); +/// [41] Attribute ::= Name Eq AttValue +// pub fn attribute(input: &str) -> IResult<&str, Attribute> { +// separated_pair(name, eq, att_value)(input) +// } + +/// [13] ETag ::= '</' QName S? '>' +/// [42] ETag ::= '</' Name S? '>' +impl<'s> Parser<'s, ETag<'s>> for ETag<'s> { + fn parse(input: &'s str) -> IResult<&str, ETag<'s>> { + map( + delimited(tag("</"), QName::parse, pair(opt(S::parse), tag(">"))), + |name| ETag { name }, + )(input) + } +} + +/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* +impl<'s> Parser<'s, Content<'s>> for Content<'s> { + fn parse(input: &'s str) -> IResult<&str, Content<'s>> { + map( + pair( + opt(CharData::parse), + many0(pair( + alt(( + map(Element::parse, |element| ContentItem::Element(element)), + map(Reference::parse, |reference| { + ContentItem::Reference(reference) + }), + map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)), + map(PI::parse, |pi| ContentItem::PI(pi)), + map(Comment::parse, |comment| ContentItem::Comment(comment)), + )), + opt(CharData::parse), + )), + ), + |(char_data, content)| Content { char_data, content }, + )(input) + } +} + +/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' +/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec] +impl<'s> Parser<'s, EmptyElemTag<'s>> for EmptyElemTag<'s> { + fn parse(input: &'s str) -> IResult<&str, EmptyElemTag<'s>> { + map( + delimited( + tag("<"), + pair(QName::parse, many0(preceded(S::parse, Attribute::parse))), + pair(opt(S::parse), tag("/>")), + ), + |(name, attributes)| EmptyElemTag { name, attributes }, + )(input) + } +} + +/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>' +/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' +impl<'s> Parser<'s, Elementdecl<'s>> for Elementdecl<'s> { + fn parse(input: &'s str) -> IResult<&str, Elementdecl> { + map( + delimited( + pair(tag("<!ELEMENT"), S::parse), + separated_pair(QName::parse, S::parse, Contentspec::parse), + pair(opt(S::parse), tag(">")), + ), + |(name, contentspec)| Elementdecl { name, contentspec }, + )(input) + } +} + +/// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children +impl<'s> Parser<'s, Contentspec<'s>> for Contentspec<'s> { + fn parse(input: &'s str) -> IResult<&str, Contentspec<'s>> { + alt(( + value(Contentspec::Empty, tag("EMPTY")), + value(Contentspec::Any, tag("ANY")), + map(Mixed::parse, |mixed| Contentspec::Mixed(mixed)), + map(Children::parse, |children| Contentspec::Children(children)), + ))(input) + } +} + +/// Occurence ::= ('?' | '*' | '+')? +impl Parser<'_, Occurence> for Occurence { + fn parse(input: &'_ str) -> IResult<&str, Occurence> { + map( + opt(alt((tag("?"), tag("*"), tag("+")))), + |occurence| match occurence { + Some("?") => Occurence::Optional, + Some("*") => Occurence::Many0, + Some("+") => Occurence::Many1, + _ => Occurence::Once, + }, + )(input) + } +} + +/// [47] children ::= (choice | seq) ('?' | '*' | '+')? +impl<'s> Parser<'s, Children<'s>> for Children<'s> { + fn parse(input: &'s str) -> IResult<&str, Children<'s>> { + map( + pair( + alt(( + map(Choice::parse, |choice| ChildrenKind::Choice(choice)), + map(Seq::parse, |seq| ChildrenKind::Seq(seq)), + )), + Occurence::parse, + ), + |(kind, occurence)| Children { kind, occurence }, + )(input) + } +} + +/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')? +/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? +impl<'s> Parser<'s, Cp<'s>> for Cp<'s> { + fn parse(input: &'s str) -> IResult<&str, Cp<'s>> { + map( + pair( + alt(( + map(QName::parse, |name| CpKind::Name(name)), + map(Choice::parse, |choice| CpKind::Choice(choice)), + map(Seq::parse, |seq| CpKind::Seq(seq)), + )), + Occurence::parse, + ), + |(kind, occurence)| Cp { kind, occurence }, + )(input) + } +} + +/// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' +impl<'s> Parser<'s, Choice<'s>> for Choice<'s> { + fn parse(input: &'s str) -> IResult<&str, Choice<'s>> { + map( + delimited( + pair(tag("("), opt(S::parse)), + pair( + Cp::parse, + many1(preceded( + tuple((opt(S::parse), tag("|"), opt(S::parse))), + Cp::parse, + )), + ), + pair(opt(S::parse), tag(")")), + ), + |(head, tail)| { + let choice = vec![vec![head], tail].concat(); + Choice(choice) + }, + )(input) + } +} + +/// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' +impl<'s> Parser<'s, Seq<'s>> for Seq<'s> { + fn parse(input: &'s str) -> IResult<&str, Seq<'s>> { + map( + delimited( + pair(tag("("), opt(S::parse)), + pair( + Cp::parse, + many0(preceded( + tuple((opt(S::parse), tag(","), opt(S::parse))), + Cp::parse, + )), + ), + pair(opt(S::parse), tag(")")), + ), + |(head, tail)| { + let seq = vec![vec![head], tail].concat(); + Seq(seq) + }, + )(input) + } +} + +/// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')' +/// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' +impl<'s> Parser<'s, Mixed<'s>> for Mixed<'s> { + fn parse(input: &'s str) -> IResult<&str, Mixed<'s>> { + alt(( + map( + delimited( + tuple((tag("("), S::parse, tag("#PCDATA"))), + many0(preceded( + tuple((opt(S::parse), tag("|"), opt(S::parse))), + QName::parse, + )), + pair(opt(S::parse), tag(")*")), + ), + |names| Mixed(names), + ), + value( + Mixed(Vec::new()), + tuple(( + tag("("), + opt(S::parse), + tag("#PCDATA"), + opt(S::parse), + tag(")"), + )), + ), + ))(input) + } +} + +/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>' +/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' +impl<'s> Parser<'s, AttlistDecl<'s>> for AttlistDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, AttlistDecl<'s>> { + map( + delimited( + pair(tag("<!ATTLIST"), S::parse), + pair(QName::parse, many0(AttDef::parse)), + pair(opt(S::parse), tag(">")), + ), + |(element_type, att_defs)| AttlistDecl { + element_type, + att_defs, + }, + )(input) + } +} + +/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl +/// [53] AttDef ::= S Name S AttType S DefaultDecl +impl<'s> Parser<'s, AttDef<'s>> for AttDef<'s> { + fn parse(input: &'s str) -> IResult<&str, AttDef<'s>> { + map( + tuple(( + preceded( + S::parse, + alt(( + map(QName::parse, |q_name| AttDefName::QName(q_name)), + map(NSAttName::parse, |ns_att_name| { + AttDefName::NSAttName(ns_att_name) + }), + )), + ), + preceded(S::parse, AttType::parse), + preceded(S::parse, DefaultDecl::parse), + )), + |(name, att_type, default_decl)| AttDef { + name, + att_type, + default_decl, + }, + )(input) + } +} + +/// [54] AttType ::= StringType | TokenizedType | EnumeratedType +impl<'s> Parser<'s, AttType<'s>> for AttType<'s> { + fn parse(input: &'s str) -> IResult<&str, AttType<'s>> { + alt(( + value(AttType::StringType, StringType::parse), + map(TokenizedType::parse, |tokenized_type| { + AttType::TokenizedType(tokenized_type) + }), + map(EnumeratedType::parse, |enumerated_type| { + AttType::EnumeratedType(enumerated_type) + }), + ))(input) + } +} + +/// [55] StringType ::= 'CDATA' +impl Parser<'_, StringType> for StringType { + fn parse(input: &'_ str) -> IResult<&str, StringType> { + value(StringType, tag("CDATA"))(input) + } +} + +/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' +impl Parser<'_, TokenizedType> for TokenizedType { + fn parse(input: &'_ str) -> IResult<&str, TokenizedType> { + alt(( + value(TokenizedType::ID, tag("ID")), + // TODO: check if this is required + // try idrefs first to avoid losing 'S' + value(TokenizedType::IDRefs, tag("IDREFS")), + value(TokenizedType::IDRef, tag("IDREF")), + value(TokenizedType::Entity, tag("ENTITY")), + value(TokenizedType::Entities, tag("ENTITIES")), + // same here + value(TokenizedType::NMTokens, tag("NMTOKENS")), + value(TokenizedType::NMToken, tag("NMTOKEN")), + ))(input) + } +} + +/// [57] EnumeratedType ::= NotationType | Enumeration +impl<'s> Parser<'s, EnumeratedType<'s>> for EnumeratedType<'s> { + fn parse(input: &'s str) -> IResult<&str, EnumeratedType<'s>> { + alt(( + map(NotationType::parse, |notation_type| { + EnumeratedType::NotationType(notation_type) + }), + map(Enumeration::parse, |enumeration| { + EnumeratedType::Enumeration(enumeration) + }), + ))(input) + } +} + +/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' +impl<'s> Parser<'s, NotationType<'s>> for NotationType<'s> { + fn parse(input: &'s str) -> IResult<&str, NotationType<'s>> { + map( + delimited( + tuple((tag("NOTATION"), S::parse, tag("("), opt(S::parse))), + pair( + Name::parse, + many0(preceded( + tuple((opt(S::parse), tag("|"), opt(S::parse))), + Name::parse, + )), + ), + pair(opt(S::parse), tag(")")), + ), + |(head, tail)| { + let notation_type = vec![vec![head], tail].concat(); + NotationType(notation_type) + }, + )(input) + } +} + +/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' +impl<'s> Parser<'s, Enumeration<'s>> for Enumeration<'s> { + fn parse(input: &'s str) -> IResult<&str, Enumeration<'s>> { + map( + delimited( + pair(tag("("), opt(S::parse)), + pair( + Nmtoken::parse, + many0(preceded( + tuple((opt(S::parse), tag("|"), opt(S::parse))), + Nmtoken::parse, + )), + ), + pair(opt(S::parse), tag(")")), + ), + |(head, tail)| { + let enumeration = vec![vec![head], tail].concat(); + Enumeration(enumeration) + }, + )(input) + } +} + +/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) +impl<'s> Parser<'s, DefaultDecl<'s>> for DefaultDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, DefaultDecl<'s>> { + alt(( + value(DefaultDecl::Required, tag("#REQUIRED")), + value(DefaultDecl::Implied, tag("#IMPLIED")), + map( + preceded(opt(pair(tag("#FIXED"), S::parse)), AttValue::parse), + |att_value| DefaultDecl::Fixed(att_value), + ), + ))(input) + } +} + +/// [61] conditionalSect ::= includeSect | ignoreSect +impl<'s> Parser<'s, ConditionalSect<'s>> for ConditionalSect<'s> { + fn parse(input: &'s str) -> IResult<&str, ConditionalSect<'s>> { + alt(( + map(IncludeSect::parse, |include_sect| { + ConditionalSect::IncludeSect(include_sect) + }), + map(IgnoreSect::parse, |ignore_sect| { + ConditionalSect::IgnoreSect(ignore_sect) + }), + ))(input) + } +} + +/// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' +impl<'s> Parser<'s, IncludeSect<'s>> for IncludeSect<'s> { + fn parse(input: &'s str) -> IResult<&str, IncludeSect<'s>> { + map( + delimited( + tuple(( + tag("<!["), + opt(S::parse), + tag("INCLUDE"), + opt(S::parse), + tag("["), + )), + ExtSubsetDecl::parse, + tag("]]>"), + ), + |ext_subset_decl| IncludeSect(ext_subset_decl), + )(input) + } +} + +/// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' +impl<'s> Parser<'s, IgnoreSect<'s>> for IgnoreSect<'s> { + fn parse(input: &'s str) -> IResult<&str, IgnoreSect<'s>> { + map( + delimited( + tuple(( + tag("<!["), + opt(S::parse), + tag("IGNORE"), + opt(S::parse), + tag("["), + )), + many0(IgnoreSectContents::parse), + tag("]]>"), + ), + |ignore_sect_contents| IgnoreSect(ignore_sect_contents), + )(input) + } +} + +/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* +impl<'s> Parser<'s, IgnoreSectContents<'s>> for IgnoreSectContents<'s> { + fn parse(input: &'s str) -> IResult<&str, IgnoreSectContents<'s>> { + map( + pair( + Ignore::parse, + many0(tuple(( + delimited(tag("<!["), IgnoreSectContents::parse, tag("]]>")), + Ignore::parse, + ))), + ), + |(ignore, ignore_list)| IgnoreSectContents { + ignore, + ignore_list, + }, + )(input) + } +} + +/// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) +impl<'s> Parser<'s, Ignore<'s>> for Ignore<'s> { + fn parse(input: &'s str) -> IResult<&str, Ignore<'s>> { + map( + recognize(many_till(Char::parse, peek(alt((tag("<!["), tag("]]>")))))), + |ignore| Ignore(ignore), + )(input) + } +} + +/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' +impl<'s> Parser<'s, CharRef<'s>> for CharRef<'s> { + fn parse(input: &'s str) -> IResult<&str, CharRef<'s>> { + alt(( + delimited( + tag("&#"), + map(take_while(|c| matches!(c, '0'..='9')), |decimal| { + CharRef::Decimal(decimal) + }), + tag(";"), + ), + delimited( + tag("&#x"), + map( + take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )), + |hexadecimal| CharRef::Hexadecimal(hexadecimal), + ), + tag(";"), + ), + ))(input) + } +} + +/// [67] Reference ::= EntityRef | CharRef +impl<'s> Parser<'s, Reference<'s>> for Reference<'s> { + fn parse(input: &'s str) -> IResult<&str, Reference<'s>> { + alt(( + map(EntityRef::parse, |entity_ref| { + Reference::EntityRef(entity_ref) + }), + map(CharRef::parse, |char_ref| Reference::CharRef(char_ref)), + ))(input) + } +} + +/// [68] EntityRef ::= '&' Name ';' +impl<'s> Parser<'s, EntityRef<'s>> for EntityRef<'s> { + fn parse(input: &'s str) -> IResult<&str, EntityRef<'s>> { + map(delimited(tag("&"), Name::parse, tag(";")), |entity_ref| { + EntityRef(entity_ref) + })(input) + } +} + +/// [69] PEReference ::= '%' Name ';' +impl<'s> Parser<'s, PEReference<'s>> for PEReference<'s> { + fn parse(input: &'s str) -> IResult<&str, PEReference<'s>> { + map(delimited(tag("%"), Name::parse, tag(";")), |pe_reference| { + PEReference(pe_reference) + })(input) + } +} + +/// [70] EntityDecl ::= GEDecl | PEDecl +impl<'s> Parser<'s, EntityDecl<'s>> for EntityDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, EntityDecl<'s>> { + alt(( + map(GEDecl::parse, |ge_decl| EntityDecl::GEDecl(ge_decl)), + map(PEDecl::parse, |pe_decl| EntityDecl::PEDecl(pe_decl)), + ))(input) + } +} + +/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' +impl<'s> Parser<'s, GEDecl<'s>> for GEDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, GEDecl<'s>> { + map( + delimited( + pair(tag("<!ENTITY"), S::parse), + separated_pair(Name::parse, S::parse, EntityDef::parse), + pair(opt(S::parse), tag(">")), + ), + |(name, entity_def)| GEDecl { name, entity_def }, + )(input) + } +} + +/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' +impl<'s> Parser<'s, PEDecl<'s>> for PEDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, PEDecl<'s>> { + map( + delimited( + tuple((tag("<!ENTITY"), S::parse, tag("%"), S::parse)), + separated_pair(Name::parse, S::parse, PEDef::parse), + pair(opt(S::parse), tag(">")), + ), + |(name, pe_def)| PEDecl { name, pe_def }, + )(input) + } +} + +/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) +impl<'s> Parser<'s, EntityDef<'s>> for EntityDef<'s> { + fn parse(input: &'s str) -> IResult<&str, EntityDef<'s>> { + alt(( + map(EntityValue::parse, |entity_value| { + EntityDef::EntityValue(entity_value) + }), + map( + pair(ExternalID::parse, opt(NDataDecl::parse)), + |(external_id, ndata_decl)| EntityDef::ExternalID { + external_id, + ndata_decl, + }, + ), + ))(input) + } +} + +/// [74] PEDef ::= EntityValue | ExternalID +impl<'s> Parser<'s, PEDef<'s>> for PEDef<'s> { + fn parse(input: &'s str) -> IResult<&str, PEDef<'s>> { + alt(( + map(EntityValue::parse, |entity_value| { + PEDef::EntityValue(entity_value) + }), + map(ExternalID::parse, |external_id| { + PEDef::ExternalID(external_id) + }), + ))(input) + } +} + +/// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral +// pub fn external_id(input: &str) -> IResult<&str, ExternalID> { +impl<'s> Parser<'s, ExternalID<'s>> for ExternalID<'s> { + fn parse(input: &'s str) -> IResult<&str, ExternalID<'s>> { + alt(( + map( + preceded(pair(tag("SYSTEM"), S::parse), SystemLiteral::parse), + |system_identifier| ExternalID::SYSTEM { system_identifier }, + ), + map( + preceded( + pair(tag("PUBLIC"), S::parse), + separated_pair(PubidLiteral::parse, S::parse, SystemLiteral::parse), + ), + |(public_identifier, system_identifier)| ExternalID::PUBLIC { + public_identifier, + system_identifier, + }, + ), + ))(input) + } +} + +/// [76] NDataDecl ::= S 'NDATA' S Name +impl<'s> Parser<'s, NDataDecl<'s>> for NDataDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, NDataDecl<'s>> { + map( + preceded(tuple((S::parse, tag("NDATA"), S::parse)), Name::parse), + |n_data_decl| NDataDecl(n_data_decl), + )(input) + } +} + +/// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' +impl<'s> Parser<'s, TextDecl<'s>> for TextDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, TextDecl<'s>> { + map( + delimited( + tag("<?xml"), + pair( + opt(VersionInfo::parse), + terminated(EncodingDecl::parse, opt(S::parse)), + ), + tag("?>"), + ), + |(version_info, encoding_decl)| TextDecl { + version_info, + encoding_decl, + }, + )(input) + } +} + +/// [78] extParsedEnt ::= TextDecl? content +impl<'s> Parser<'s, ExtParsedEnt<'s>> for ExtParsedEnt<'s> { + fn parse(input: &'s str) -> IResult<&str, ExtParsedEnt<'s>> { + map( + pair(opt(TextDecl::parse), Content::parse), + |(text_decl, content)| ExtParsedEnt { text_decl, content }, + )(input) + } +} + +/// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName +impl<'s> Parser<'s, EncodingDecl<'s>> for EncodingDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, EncodingDecl<'s>> { + map( + preceded( + tuple((S::parse, tag("encoding"), Eq::parse)), + alt(( + delimited(char('"'), EncName::parse, char('"')), + delimited(char('\''), EncName::parse, char('\'')), + )), + ), + |encoding_decl| EncodingDecl(encoding_decl), + )(input) + } +} + +/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* +impl<'s> Parser<'s, EncName<'s>> for EncName<'s> { + fn parse(input: &'s str) -> IResult<&str, EncName<'s>> { + map( + recognize(pair( + satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )), + many0(satisfy( + |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ), + )), + )), + |enc_name| EncName(enc_name), + )(input) + } +} + +/// [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' +impl<'s> Parser<'s, NotationDecl<'s>> for NotationDecl<'s> { + fn parse(input: &'s str) -> IResult<&str, NotationDecl<'s>> { + map( + delimited( + pair(tag("<!NOTATION"), S::parse), + separated_pair( + Name::parse, + S::parse, + alt(( + map(ExternalID::parse, |external_id| { + NotationDeclID::External(external_id) + }), + map(PublicID::parse, |public_id| { + NotationDeclID::Public(public_id) + }), + )), + ), + pair(opt(S::parse), tag(">")), + ), + |(name, id)| NotationDecl { name, id }, + )(input) + } +} + +/// [83] PublicID ::= 'PUBLIC' S PubidLiteral +impl<'s> Parser<'s, PublicID<'s>> for PublicID<'s> { + fn parse(input: &'s str) -> IResult<&str, PublicID<'s>> { + map( + preceded(pair(tag("PUBLIC"), S::parse), PubidLiteral::parse), + |public_id| PublicID(public_id), + )(input) + } +} + +#[cfg(test)] +mod tests { + use std::num::NonZero; + + use super::*; + + #[test] + fn test_char_data() { + assert_eq!( + Ok(("&def]]>ghi", CharData("abc"))), + CharData::parse("abc&def]]>ghi") + ); + assert_eq!( + Ok(("]]>ghi", CharData("abcdef"))), + CharData::parse("abcdef]]>ghi") + ); + assert_eq!( + Ok(("&defghi", CharData("abc"))), + CharData::parse("abc&defghi") + ); + assert_eq!( + Ok(("]]>def&ghi", CharData("abc"))), + CharData::parse("abc]]>def&ghi") + ); + assert_eq!( + Ok(("&ghi", CharData("abc]>def"))), + CharData::parse("abc]>def&ghi") + ); + assert_eq!( + Err(Err::Incomplete(nom::Needed::Size( + NonZero::new(3usize).unwrap() + ))), + CharData::parse("abcdefghi") + ); + } + + #[test] + fn test_comment() { + assert_eq!(Ok(("", Comment(""))), Comment::parse("<!---->")); + assert_eq!(Ok(("", Comment("asdf"))), Comment::parse("<!--asdf-->")); + assert_eq!(Ok(("", Comment("as-df"))), Comment::parse("<!--as-df-->")); + assert_eq!( + Err(Err::Incomplete(nom::Needed::Size( + NonZero::new(2usize).unwrap() + ))), + Comment::parse("<!--asdf") + ); + } + + #[test] + fn test_pi_target() { + assert_eq!(Ok((" ", PITarget(Name("asdf")))), PITarget::parse("asdf ")); + assert_eq!( + Ok((" ", PITarget(Name("xmlasdf")))), + PITarget::parse("xmlasdf ") + ); + assert_eq!( + Err(Err::Error(Error { + input: "xml ", + code: ErrorKind::Tag + })), + PITarget::parse("xml ") + ); + assert_eq!( + Err(Err::Error(Error { + input: "xMl ", + code: ErrorKind::Tag + })), + PITarget::parse("xMl ") + ); + } + + #[test] + fn test_cd_sect() { + assert_eq!( + Ok(("", CDSect(CData("<greeting>Hello, world!</greeting>")))), + CDSect::parse("<![CDATA[<greeting>Hello, world!</greeting>]]>") + ) + } + + #[test] + fn test_cd_start() { + assert_eq!(Ok(("asdf", CDStart)), CDStart::parse("<![CDATA[asdf")) + } + + #[test] + fn test_cdata() { + assert_eq!(Ok(("]]>asdf", CData("asdf"))), CData::parse("asdf]]>asdf")); + assert_eq!( + Ok(("]]>asdf", CData("<![CDATA[asdf"))), + CData::parse("<![CDATA[asdf]]>asdf") + ); + assert_eq!( + Ok(("]]>asdf", CData("<greeting>Hello, world!</greeting>"))), + CData::parse("<greeting>Hello, world!</greeting>]]>asdf") + ) + } + + #[test] + fn test_cd_end() { + assert_eq!(Ok(("asdf", CDEnd)), CDEnd::parse("]]>asdf")) + } +} |