diff options
Diffstat (limited to '')
| -rw-r--r-- | src/xml/mod.rs | 1240 | ||||
| -rw-r--r-- | src/xml/parsers.rs | 1502 | 
2 files changed, 1653 insertions, 1089 deletions
diff --git a/src/xml/mod.rs b/src/xml/mod.rs index 47c1779..b1d6ea5 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -1,185 +1,92 @@  use std::char; -use nom::{ -    branch::alt, -    bytes::streaming::{is_a, tag, take, take_while}, -    character::{ -        complete::one_of, -        streaming::{char, none_of, satisfy}, -    }, -    combinator::{map, not, opt, peek, recognize, value}, -    error::{Error, ErrorKind}, -    multi::{many0, many1, many_till}, -    sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, -    Err, IResult, Parser, -}; - -// parser: parses tokens from lexer into events -// no well formedness, validity, or data model, simple translation of input into rust types -// output is a rust representation of the input xml -// types could be used for xml production too? -  mod parsers; +/// [1]   	NSAttName	   ::=   	PrefixedAttName | DefaultAttName  #[derive(Clone, Debug)]  pub enum NSAttName<'s> {      PrefixedAttName(PrefixedAttName<'s>),      DefaultAttName,  } -/// [1]   	NSAttName	   ::=   	PrefixedAttName | DefaultAttName -pub fn ns_att_name(input: &str) -> IResult<&str, NSAttName> { -    alt(( -        map(prefixed_att_name, |prefixed_att_name| { -            NSAttName::PrefixedAttName(prefixed_att_name) -        }), -        value(NSAttName::DefaultAttName, default_att_name), -    ))(input) -} +/// [2]   	PrefixedAttName	   ::=   	'xmlns:' NCName  #[derive(Clone, Debug)]  pub struct PrefixedAttName<'s>(NCName<'s>); -/// [2]   	PrefixedAttName	   ::=   	'xmlns:' NCName -pub fn prefixed_att_name(input: &str) -> IResult<&str, PrefixedAttName> { -    map(preceded(tag("xmlns:"), nc_name), |nc_name| { -        PrefixedAttName(nc_name) -    })(input) -} +/// [3]   	DefaultAttName	   ::=   	'xmlns';  #[derive(Clone, Debug)]  pub struct DefaultAttName; -/// [3]   	DefaultAttName	   ::=   	'xmlns'; -pub fn default_att_name(input: &str) -> IResult<&str, DefaultAttName> { -    value(DefaultAttName, tag("xmlns"))(input) -} +/// [4]   	NCName	   ::=   	Name - (Char* ':' Char*)  #[derive(Clone, Debug)]  pub struct NCName<'s>(&'s str); -/// [4]   	NCName	   ::=   	Name - (Char* ':' Char*) -pub fn nc_name(input: &str) -> IResult<&str, NCName> { -    map( -        recognize(pair( -            recognize(name_start_char).and_then(satisfy(|c| c != ':')), -            many_till(name_char, peek(char(':'))), -        )), -        |nc_name| NCName(nc_name), -    )(input) -} +/// [7]   	QName	   ::=   	PrefixedName | UnprefixedName  #[derive(Clone, Debug)]  pub enum QName<'s> {      PrefixedName(PrefixedName<'s>),      UnprefixedName(UnprefixedName<'s>),  } -/// [7]   	QName	   ::=   	PrefixedName | UnprefixedName -pub fn q_name(input: &str) -> IResult<&str, QName> { -    alt(( -        map(prefixed_name, |prefixed_name| { -            QName::PrefixedName(prefixed_name) -        }), -        map(unprefixed_name, |unprefixed_name| { -            QName::UnprefixedName(unprefixed_name) -        }), -    ))(input) -} +/// [8]   	PrefixedName	   ::=   	Prefix ':' LocalPart  #[derive(Clone, Debug)]  pub struct PrefixedName<'s> {      prefix: Prefix<'s>,      local_part: LocalPart<'s>,  } -/// [8]   	PrefixedName	   ::=   	Prefix ':' LocalPart -pub fn prefixed_name(input: &str) -> IResult<&str, PrefixedName> { -    map( -        separated_pair(prefix, char(':'), local_part), -        |(prefix, local_part)| PrefixedName { prefix, local_part }, -    )(input) -} +/// [9]   	UnprefixedName	   ::=   	LocalPart  #[derive(Clone, Debug)]  pub struct UnprefixedName<'s>(LocalPart<'s>); -/// [9]   	UnprefixedName	   ::=   	LocalPart -pub fn unprefixed_name(input: &str) -> IResult<&str, UnprefixedName> { -    map(local_part, |local_part| UnprefixedName(local_part))(input) -} +/// [10]   	Prefix	   ::=   	NCName  #[derive(Clone, Debug)]  pub struct Prefix<'s>(NCName<'s>); -/// [10]   	Prefix	   ::=   	NCName -pub fn prefix(input: &str) -> IResult<&str, Prefix> { -    map(nc_name, |nc_name| Prefix(nc_name))(input) -} +/// [11]   	LocalPart	   ::=   	NCName  #[derive(Clone, Debug)]  pub struct LocalPart<'s>(NCName<'s>); -/// [11]   	LocalPart	   ::=   	NCName -pub fn local_part(input: &str) -> IResult<&str, LocalPart> { -    map(nc_name, |nc_name| LocalPart(nc_name))(input) -}  // xml spec -pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);  /// [1]   	document	   ::=   	prolog element Misc* -pub fn document(input: &str) -> IResult<&str, Document> { -    tuple((prolog, element, many0(misc)))(input) -} +pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>); -pub type Char = char;  /// [2]   	Char	   ::=   	#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]	/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ -pub fn xmlchar(input: &str) -> IResult<&str, Char> { -    satisfy( -        |c| matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}'), -    )(input) -} +#[repr(transparent)] +pub struct Char(char); -pub type S<'s> = &'s str;  /// [3]   	S	   ::=   	(#x20 | #x9 | #xD | #xA)+ -pub fn s(input: &str) -> IResult<&str, S> { -    is_a("\u{20}\u{9}\u{D}\u{A}")(input) -} +#[repr(transparent)] +pub struct S<'s>(&'s str); -pub type NameStartChar = char;  /// [4]   	NameStartChar	   ::=   	":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] -pub fn name_start_char(input: &str) -> IResult<&str, NameStartChar> { -    satisfy( -        |c| matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}'), -    )(input) -} +#[repr(transparent)] +pub struct NameStartChar(char); -pub type NameChar = char;  /// [4a]   	NameChar	   ::=   	NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] -pub fn name_char(input: &str) -> IResult<&str, NameChar> { -    alt(( -        name_start_char, -        satisfy( -            |c| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'), -        ), -    ))(input) -} +#[repr(transparent)] +pub struct NameChar(char); -pub type Name<'s> = &'s str;  /// [5]   	Name	   ::=   	NameStartChar (NameChar)* -pub fn name(input: &str) -> IResult<&str, Name> { -    recognize(pair(name_start_char, many0(name_char)))(input) -} +#[derive(Debug, Clone, PartialEq, Eq)] +#[repr(transparent)] +pub struct Name<'s>(&'s str); -pub type Names<'s> = &'s str;  /// [6]   	Names	   ::=   	Name (#x20 Name)* -pub fn names(input: &str) -> IResult<&str, Names> { -    recognize(pair(name, many0(pair(char('\u{20}'), name))))(input) -} +#[repr(transparent)] +// TODO: turn into vec +pub struct Names<'s>(&'s str); -pub type Nmtoken<'s> = &'s str;  /// [7]   	Nmtoken	   ::=   	(NameChar)+ -pub fn nmtoken(input: &str) -> IResult<&str, Nmtoken> { -    recognize(many1(name_char))(input) -} +#[derive(Debug, Clone)] +#[repr(transparent)] +pub struct Nmtoken<'s>(&'s str); -pub type Nmtokens<'s> = &'s str;  /// [8]   	Nmtokens	   ::=   	Nmtoken (#x20 Nmtoken)* -pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> { -    recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input) -} +#[repr(transparent)] +// TODO: turn into vec +pub struct Nmtokens<'s>(&'s str);  #[derive(Clone, Debug)]  pub enum LiteralData<'s> { @@ -187,275 +94,103 @@ pub enum LiteralData<'s> {      PEReference(PEReference<'s>),      Reference(Reference<'s>),  } - -pub type EntityValue<'s> = Vec<LiteralData<'s>>;  /// [9]   	EntityValue	   ::=   	'"' ([^%&"] | PEReference | Reference)* '"'  ///			|  "'" ([^%&'] | PEReference | Reference)* "'" -pub fn entity_value(input: &str) -> IResult<&str, EntityValue> { -    alt(( -        delimited( -            char('"'), -            many0(alt(( -                map( -                    recognize(many_till(take(1usize), peek(one_of("%&\"")))), -                    |string| LiteralData::String(string), -                ), -                map(pe_reference, |pe_reference| { -                    LiteralData::PEReference(pe_reference) -                }), -                map(reference, |reference| LiteralData::Reference(reference)), -            ))), -            char('"'), -        ), -        delimited( -            char('\''), -            many0(alt(( -                map( -                    recognize(many_till(take(1usize), peek(one_of("%&'")))), -                    |string| LiteralData::String(string), -                ), -                map(pe_reference, |pe_reference| { -                    LiteralData::PEReference(pe_reference) -                }), -                map(reference, |reference| LiteralData::Reference(reference)), -            ))), -            char('\''), -        ), -    ))(input) -} +#[derive(Debug)] +#[repr(transparent)] +pub struct EntityValue<'s>(Vec<LiteralData<'s>>); -pub type AttValue<'s> = Vec<LiteralData<'s>>;  /// [10]   	AttValue	   ::=   	'"' ([^<&"] | Reference)* '"'  /// 			|  "'" ([^<&'] | Reference)* "'" -pub fn att_value(input: &str) -> IResult<&str, AttValue> { -    alt(( -        delimited( -            char('"'), -            many0(alt(( -                map( -                    recognize(many_till(take(1usize), peek(one_of("%&\"")))), -                    |string| LiteralData::String(string), -                ), -                map(reference, |reference| LiteralData::Reference(reference)), -            ))), -            char('"'), -        ), -        delimited( -            char('\''), -            many0(alt(( -                map( -                    recognize(many_till(take(1usize), peek(one_of("%&'")))), -                    |string| LiteralData::String(string), -                ), -                map(reference, |reference| LiteralData::Reference(reference)), -            ))), -            char('\''), -        ), -    ))(input) -} +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct AttValue<'s>(Vec<LiteralData<'s>>); -pub type SystemLiteral<'s> = &'s str;  /// [11]   	SystemLiteral	   ::=   	('"' [^"]* '"') | ("'" [^']* "'") -pub fn system_literal(input: &str) -> IResult<&str, SystemLiteral> { -    alt(( -        delimited(char('"'), recognize(many0(none_of("\""))), char('"')), -        delimited(char('\''), recognize(many0(none_of("'"))), char('\'')), -    ))(input) -} +#[derive(Debug)] +#[repr(transparent)] +pub struct SystemLiteral<'s>(&'s str); -pub type PubidLiteral<'s> = &'s str;  /// [12]   	PubidLiteral	   ::=   	'"' PubidChar* '"' | "'" (PubidChar - "'")* "'" -pub fn pubid_literal(input: &str) -> IResult<&str, PubidLiteral> { -    alt(( -        delimited(char('"'), recognize(many0(pubid_char)), char('"')), -        delimited( -            char('\''), -            recognize(many0(recognize(not(char('\''))).and_then(pubid_char))), -            char('\''), -        ), -    ))(input) -} +#[derive(Debug)] +#[repr(transparent)] +pub struct PubidLiteral<'s>(&'s str); -pub type PubidChar<'s> = char;  /// [13]   	PubidChar	   ::=   	#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] -pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> { -    satisfy(|c| matches!(c, '\u{20}' | '\u{D}' | '\u{A}' | 'a'..='z' | 'A'..='Z' | '0'..='9'))( -        input, -    ) -} +#[repr(transparent)] +pub struct PubidChar(char); -pub type CharData<'s> = &'s str;  /// [14]   	CharData	   ::=   	[^<&]* - ([^<&]* ']]>' [^<&]*) -pub fn char_data(input: &str) -> IResult<&str, CharData> { -    recognize(many_till( -        none_of("<&"), -        peek(alt((recognize(one_of("<&")), tag("]]>")))), -    ))(input) - -    // let tagg: &str; -    // if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) { -    //     if let Ok((_, tagg2)) = -    //         peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input) -    //     { -    //         if tagg1.len() < tagg2.len() { -    //             tagg = tagg1 -    //         } else { -    //             tagg = tagg2 -    //         } -    //     } else { -    //         tagg = tagg1; -    //     } -    // } else { -    //     (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)? -    // } -    // tag(tagg)(input) - -    // recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input) -    // recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input) -    // take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct CharData<'s>(&'s str); -pub type Comment<'s> = &'s str; -/// Comment	   ::=   	'<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' -pub fn comment(input: &str) -> IResult<&str, Comment> { -    delimited( -        tag("<!--"), -        recognize(many_till(xmlchar, peek(tag("--")))), -        tag("-->"), -    )(input) -} +/// [15]    Comment	   ::=   	'<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct Comment<'s>(&'s str); +/// [16]   	PI	   ::=   	'<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'  #[derive(Clone, Debug)]  pub struct PI<'s> { -    target: &'s str, +    target: PITarget<'s>,      instruction: Option<&'s str>,  } -/// [16]   	PI	   ::=   	'<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' -pub fn pi(input: &str) -> IResult<&str, PI> { -    map( -        delimited( -            tag("<?"), -            pair( -                pi_target, -                opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))), -            ), -            tag("?>"), -        ), -        |(target, instruction)| PI { -            target, -            instruction, -        }, -    )(input) -} -pub type PITarget<'s> = &'s str;  /// [17]   	PITarget	   ::=   	Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) -pub fn pi_target(input: &str) -> IResult<&str, PITarget> { -    let (rest, name) = name(input)?; -    if name.to_lowercase() == "xml" { -        return Err(Err::Error(Error { -            input, -            // TODO: check if better error to return -            code: ErrorKind::Tag, -        })); -    } else { -        return Ok((rest, name)); -    } -} +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct PITarget<'s>(Name<'s>); -pub type CDSect<'s> = (CDStart<'s>, CData<'s>, CDEnd<'s>);  /// [18]   	CDSect	   ::=   	CDStart CData CDEnd -pub fn cd_sect(input: &str) -> IResult<&str, CDSect> { -    tuple((cd_start, cdata, cd_end))(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct CDSect<'s>(CData<'s>); -pub type CDStart<'s> = &'s str;  /// [19]   	CDStart	   ::=   	'<![CDATA[' -pub fn cd_start(input: &str) -> IResult<&str, CDStart> { -    tag("<![CDATA[")(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CDStart; -pub type CData<'s> = &'s str;  /// [20]   	CData	   ::=   	(Char* - (Char* ']]>' Char*)) -pub fn cdata(input: &str) -> IResult<&str, CData> { -    recognize(many_till(xmlchar, peek(tag("]]>"))))(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct CData<'s>(&'s str); -pub type CDEnd<'s> = &'s str;  /// [21]   	CDEnd	   ::=   	']]>' -pub fn cd_end(input: &str) -> IResult<&str, CDEnd> { -    tag("]]>")(input) -} +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CDEnd; +/// [22]   	prolog	   ::=   	XMLDecl? Misc* (doctypedecl Misc*)?  pub type Prolog<'s> = (      Option<XMLDecl<'s>>,      Vec<Misc<'s>>,      Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,  ); -/// [22]   	prolog	   ::=   	XMLDecl? Misc* (doctypedecl Misc*)? -pub fn prolog(input: &str) -> IResult<&str, Prolog> { -    tuple(( -        opt(xml_decl), -        many0(misc), -        opt(tuple((doctypedecl, many0(misc)))), -    ))(input) -} +/// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'  #[derive(Debug)]  pub struct XMLDecl<'s> {      version_info: VersionInfo,      encoding_decl: Option<EncodingDecl<'s>>,      sd_decl: Option<SDDecl>,  } -/// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' -pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> { -    map( -        delimited( -            tag("<?xml"), -            tuple((version_info, opt(encoding_decl), opt(sd_decl))), -            pair(opt(s), tag("?>")), -        ), -        |(version_info, encoding_decl, sd_decl)| XMLDecl { -            version_info, -            encoding_decl, -            sd_decl, -        }, -    )(input) -} -pub type VersionInfo = VersionNum;  /// [24]   	VersionInfo	   ::=   	S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') -pub fn version_info(input: &str) -> IResult<&str, VersionInfo> { -    preceded( -        tuple((s, tag("version"), eq)), -        alt(( -            delimited(char('\''), version_num, char('\'')), -            delimited(char('"'), version_num, char('"')), -        )), -    )(input) -} +#[derive(Debug)] +pub struct VersionInfo(VersionNum);  /// [25]   	Eq	   ::=   	S? '=' S? -pub fn eq(input: &str) -> IResult<&str, &str> { -    recognize(tuple((opt(s), char('='), opt(s))))(input) -} +#[derive(Clone)] +pub struct Eq; +/// [26]   	VersionNum	   ::=   	'1.' [0-9]+  #[derive(Clone, Debug)]  pub enum VersionNum {      One,      OneDotOne,  } -/// [26]   	VersionNum	   ::=   	'1.' [0-9]+ -pub fn version_num(input: &str) -> IResult<&str, VersionNum> { -    preceded( -        tag("1."), -        alt(( -            value(VersionNum::One, char('0')), -            value(VersionNum::OneDotOne, char('1')), -        )), -    )(input) -} +/// [27]   	Misc	   ::=   	Comment | PI | S  #[derive(Clone, Debug)]  pub enum Misc<'s> {      Comment(Comment<'s>), @@ -463,80 +198,33 @@ pub enum Misc<'s> {      // TODO: how to deal with whitespace      S,  } -/// [27]   	Misc	   ::=   	Comment | PI | S -pub fn misc(input: &str) -> IResult<&str, Misc> { -    alt(( -        map(comment, |comment| Misc::Comment(comment)), -        map(pi, |pi| Misc::PI(pi)), -        value(Misc::S, s), -    ))(input) -} +/// [16]   	doctypedecl	   ::=   	'<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' +/// [28]   	doctypedecl	   ::=   	'<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'  #[derive(Debug)]  pub struct DoctypeDecl<'s> {      name: QName<'s>,      external_id: Option<ExternalID<'s>>,      int_subset: Option<IntSubset<'s>>,  } -/// [16]   	doctypedecl	   ::=   	'<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' -/// [28]   	doctypedecl	   ::=   	'<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' -pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> { -    map( -        delimited( -            pair(tag("<!DOCTYPE"), s), -            tuple(( -                q_name, -                opt(preceded(s, external_id)), -                preceded( -                    opt(s), -                    opt(terminated( -                        delimited(tag("["), int_subset, tag("]")), -                        opt(s), -                    )), -                ), -            )), -            tag(">"), -        ), -        |(name, external_id, int_subset)| DoctypeDecl { -            name, -            external_id, -            int_subset, -        }, -    )(input) -} +/// [28a]   	DeclSep	   ::=   	PEReference | S  #[derive(Clone, Debug)]  pub enum DeclSep<'s> {      PEReference(PEReference<'s>),      // TODO: tackle whitespace      S,  } -/// [28a]   	DeclSep	   ::=   	PEReference | S -pub fn decl_sep(input: &str) -> IResult<&str, DeclSep> { -    alt(( -        map(pe_reference, |pe_reference| { -            DeclSep::PEReference(pe_reference) -        }), -        value(DeclSep::S, s), -    ))(input) -}  #[derive(Debug)]  pub enum IntSubsetDeclaration<'s> {      MarkupDecl(MarkupDecl<'s>),      DeclSep(DeclSep<'s>),  } -type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;  /// [28b]   	intSubset	   ::=   	(markupdecl | DeclSep)* -pub fn int_subset(input: &str) -> IResult<&str, IntSubset> { -    many0(alt(( -        map(markup_decl, |markup_decl| { -            IntSubsetDeclaration::MarkupDecl(markup_decl) -        }), -        map(decl_sep, |decl_sep| IntSubsetDeclaration::DeclSep(decl_sep)), -    )))(input) -} +pub type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>; +/// [29]   	markupdecl	   ::=   	elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment  #[derive(Debug)]  pub enum MarkupDecl<'s> {      Elementdecl(Elementdecl<'s>), @@ -546,117 +234,44 @@ pub enum MarkupDecl<'s> {      PI(PI<'s>),      Comment(Comment<'s>),  } -/// [29]   	markupdecl	   ::=   	elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment -pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> { -    alt(( -        map(elementdecl, |elementdecl| { -            MarkupDecl::Elementdecl(elementdecl) -        }), -        map(attlist_decl, |attlist_decl| { -            MarkupDecl::AttlistDecl(attlist_decl) -        }), -        map(entity_decl, |entity_decl| { -            MarkupDecl::EntityDecl(entity_decl) -        }), -        map(notation_decl, |notation_decl| { -            MarkupDecl::NotationDecl(notation_decl) -        }), -        map(pi, |pi| MarkupDecl::PI(pi)), -        map(comment, |comment| MarkupDecl::Comment(comment)), -    ))(input) -} +/// [30]   	extSubset	   ::=   	TextDecl? extSubsetDecl  pub struct ExtSubset<'s> {      text_decl: Option<TextDecl<'s>>,      ext_subset_decl: ExtSubsetDecl<'s>,  } -/// [30]   	extSubset	   ::=   	TextDecl? extSubsetDecl -pub fn ext_subset(input: &str) -> IResult<&str, ExtSubset> { -    map( -        pair(opt(text_decl), ext_subset_decl), -        |(text_decl, ext_subset_decl)| ExtSubset { -            text_decl, -            ext_subset_decl, -        }, -    )(input) -}  pub enum ExtSubsetDeclaration<'s> {      MarkupDecl(MarkupDecl<'s>),      ConditionalSect(ConditionalSect<'s>),      DeclSep(DeclSep<'s>),  } -type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;  /// [31]   	extSubsetDecl	   ::=   	( markupdecl | conditionalSect | DeclSep)* -pub fn ext_subset_decl(input: &str) -> IResult<&str, ExtSubsetDecl> { -    many0(alt(( -        map(markup_decl, |markup_decl| { -            ExtSubsetDeclaration::MarkupDecl(markup_decl) -        }), -        map(conditional_sect, |conditional_sect| { -            ExtSubsetDeclaration::ConditionalSect(conditional_sect) -        }), -        map(decl_sep, |decl_sep| ExtSubsetDeclaration::DeclSep(decl_sep)), -    )))(input) -} +type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>; -pub type SDDecl = bool;  /// [32]   	SDDecl	   ::=   	S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) -pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> { -    preceded( -        tuple((s, tag("standalone"), eq)), -        alt(( -            delimited( -                char('\''), -                alt((value(true, tag("yes")), value(false, tag("no")))), -                char('\''), -            ), -            delimited( -                char('"'), -                alt((value(true, tag("yes")), value(false, tag("no")))), -                char('"'), -            ), -        )), -    )(input) -} +pub type SDDecl = bool;  // (Productions 33 through 38 have been removed.) +/// [39]   	element	   ::=   	EmptyElemTag | STag content ETag  #[derive(Debug, Clone)]  pub enum Element<'s> {      Empty(EmptyElemTag<'s>),      NotEmpty(STag<'s>, Content<'s>, ETag<'s>),  } -/// [39]   	element	   ::=   	EmptyElemTag | STag content ETag -pub fn element(input: &str) -> IResult<&str, Element> { -    alt(( -        map(empty_elem_tag, |empty_elem_tag| { -            Element::Empty(empty_elem_tag) -        }), -        map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| { -            Element::NotEmpty(s_tag, content, e_tag) -        }), -    ))(input) -} +/// [12]   	STag	   ::=   	'<' QName (S Attribute)* S? '>' +/// [40]   	STag	   ::=   	'<' Name (S Attribute)* S? '>'  #[derive(Debug, Clone)]  pub struct STag<'s> {      name: QName<'s>,      attributes: Vec<Attribute<'s>>,  } -/// [12]   	STag	   ::=   	'<' QName (S Attribute)* S? '>' -/// [40]   	STag	   ::=   	'<' Name (S Attribute)* S? '>' -pub fn s_tag(input: &str) -> IResult<&str, STag> { -    map( -        delimited( -            tag("<"), -            pair(q_name, many0(preceded(s, attribute))), -            pair(opt(s), tag(">")), -        ), -        |(name, attributes)| STag { name, attributes }, -    )(input) -} +/// [15]   	Attribute	   ::=   	NSAttName Eq AttValue | QName Eq AttValue +// pub type Attribute<'s> = (Name<'s>, AttValue<'s>); +/// [41]   	Attribute	   ::=   	Name Eq AttValue  #[derive(Debug, Clone)]  pub enum Attribute<'s> {      NamespaceDeclaration { @@ -668,36 +283,13 @@ pub enum Attribute<'s> {          value: AttValue<'s>,      },  } -/// [15]   	Attribute	   ::=   	NSAttName Eq AttValue | QName Eq AttValue -pub fn attribute(input: &str) -> IResult<&str, Attribute> { -    alt(( -        map( -            separated_pair(ns_att_name, eq, att_value), -            |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value }, -        ), -        map(separated_pair(q_name, eq, att_value), |(name, value)| { -            Attribute::Attribute { name, value } -        }), -    ))(input) -} -// pub type Attribute<'s> = (Name<'s>, AttValue<'s>); -/// [41]   	Attribute	   ::=   	Name Eq AttValue -// pub fn attribute(input: &str) -> IResult<&str, Attribute> { -//     separated_pair(name, eq, att_value)(input) -// } +/// [13]   	ETag	   ::=   	'</' QName S? '>' +/// [42]   	ETag	   ::=   	'</' Name S? '>'  #[derive(Debug, Clone)]  pub struct ETag<'s> {      name: QName<'s>,  } -/// [13]   	ETag	   ::=   	'</' QName S? '>' -/// [42]   	ETag	   ::=   	'</' Name S? '>' -pub fn e_tag(input: &str) -> IResult<&str, ETag> { -    map( -        delimited(tag("</"), q_name, pair(opt(s), tag(">"))), -        |name| ETag { name }, -    )(input) -}  #[derive(Debug, Clone)]  pub enum ContentItem<'s> { @@ -708,68 +300,31 @@ pub enum ContentItem<'s> {      PI(PI<'s>),      Comment(Comment<'s>),  } +/// [43]   	content	   ::=   	CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*  #[derive(Debug, Clone)]  pub struct Content<'s> {      char_data: Option<CharData<'s>>,      content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,  } -/// [43]   	content	   ::=   	CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* -pub fn content(input: &str) -> IResult<&str, Content> { -    map( -        pair( -            opt(char_data), -            many0(pair( -                alt(( -                    map(element, |element| ContentItem::Element(element)), -                    map(reference, |reference| ContentItem::Reference(reference)), -                    map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)), -                    map(pi, |pi| ContentItem::PI(pi)), -                    map(comment, |comment| ContentItem::Comment(comment)), -                )), -                opt(char_data), -            )), -        ), -        |(char_data, content)| Content { char_data, content }, -    )(input) -} +/// [14]   	EmptyElemTag	   ::=   	'<' QName (S Attribute)* S? '/>' +/// [44]   	EmptyElemTag	   ::=   	'<' Name (S Attribute)* S? '/>'	[WFC: Unique Att Spec]  #[derive(Debug, Clone)]  pub struct EmptyElemTag<'s> {      name: QName<'s>,      attributes: Vec<Attribute<'s>>,  } -/// [14]   	EmptyElemTag	   ::=   	'<' QName (S Attribute)* S? '/>' -/// [44]   	EmptyElemTag	   ::=   	'<' Name (S Attribute)* S? '/>'	[WFC: Unique Att Spec] -pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> { -    map( -        delimited( -            tag("<"), -            pair(q_name, many0(preceded(s, attribute))), -            pair(opt(s), tag("/>")), -        ), -        |(name, attributes)| EmptyElemTag { name, attributes }, -    )(input) -} +/// [17]   	elementdecl	   ::=   	'<!ELEMENT' S QName S contentspec S? '>' +/// [45]   	elementdecl	   ::=   	'<!ELEMENT' S Name S contentspec S? '>'  #[derive(Debug)]  pub struct Elementdecl<'s> {      name: QName<'s>,      contentspec: Contentspec<'s>,  } -/// [17]   	elementdecl	   ::=   	'<!ELEMENT' S QName S contentspec S? '>' -/// [45]   	elementdecl	   ::=   	'<!ELEMENT' S Name S contentspec S? '>' -pub fn elementdecl(input: &str) -> IResult<&str, Elementdecl> { -    map( -        delimited( -            pair(tag("<!ELEMENT"), s), -            separated_pair(q_name, s, contentspec), -            pair(opt(s), tag(">")), -        ), -        |(name, contentspec)| Elementdecl { name, contentspec }, -    )(input) -}  // TODO: casings??? +/// [46]   	contentspec	   ::=   	'EMPTY' | 'ANY' | Mixed | children  #[derive(Clone, Debug)]  pub enum Contentspec<'s> {      Empty, @@ -777,16 +332,8 @@ pub enum Contentspec<'s> {      Mixed(Mixed<'s>),      Children(Children<'s>),  } -/// [46]   	contentspec	   ::=   	'EMPTY' | 'ANY' | Mixed | children -pub fn contentspec(input: &str) -> IResult<&str, Contentspec> { -    alt(( -        value(Contentspec::Empty, tag("EMPTY")), -        value(Contentspec::Any, tag("ANY")), -        map(mixed, |mixed| Contentspec::Mixed(mixed)), -        map(children, |children| Contentspec::Children(children)), -    ))(input) -} +/// Occurence ::= ('?' | '*' | '+')?  #[derive(Clone, Debug)]  pub enum Occurence {      Once, @@ -794,46 +341,18 @@ pub enum Occurence {      Many0,      Many1,  } -/// Occurence ::= ('?' | '*' | '+')? -pub fn occurence(input: &str) -> IResult<&str, Occurence> { -    map( -        opt(alt((tag("?"), tag("*"), tag("+")))), -        |occurence| match occurence { -            Some("?") => Occurence::Optional, -            Some("*") => Occurence::Many0, -            Some("+") => Occurence::Many1, -            _ => Occurence::Once, -        }, -    )(input) -}  #[derive(Clone, Debug)]  pub enum ChildrenKind<'s> {      Choice(Choice<'s>),      Seq(Seq<'s>),  } +/// [47]   	children	   ::=   	(choice | seq) ('?' | '*' | '+')?  #[derive(Clone, Debug)]  pub struct Children<'s> {      kind: ChildrenKind<'s>,      occurence: Occurence,  } -/// [47]   	children	   ::=   	(choice | seq) ('?' | '*' | '+')? -pub fn children(input: &str) -> IResult<&str, Children> { -    map( -        pair( -            alt(( -                map(choice, |choice| ChildrenKind::Choice(choice)), -                map(seq, |seq| ChildrenKind::Seq(seq)), -            )), -            occurence, -        ), -        |(kind, occurence)| Children { kind, occurence }, -    )(input) -    // alt(( -    //     map(pair(choice, occurence), |(choice, occurence)| Children::Choice(choice, occurence)), -    //     map(pair(seq, occurence), |(seq, occurence)| Children::Seq(seq, occurence)) -    // ))(input) -}  #[derive(Clone, Debug)]  pub enum CpKind<'s> { @@ -841,165 +360,63 @@ pub enum CpKind<'s> {      Choice(Choice<'s>),      Seq(Seq<'s>),  } +/// [18]   	cp	   ::=   	(QName | choice | seq) ('?' | '*' | '+')? +/// [48]   	cp	   ::=   	(Name | choice | seq) ('?' | '*' | '+')?  #[derive(Clone, Debug)]  pub struct Cp<'s> {      kind: CpKind<'s>,      occurence: Occurence,  } -/// [18]   	cp	   ::=   	(QName | choice | seq) ('?' | '*' | '+')? -/// [48]   	cp	   ::=   	(Name | choice | seq) ('?' | '*' | '+')? -pub fn cp(input: &str) -> IResult<&str, Cp> { -    map( -        pair( -            alt(( -                map(q_name, |name| CpKind::Name(name)), -                map(choice, |choice| CpKind::Choice(choice)), -                map(seq, |seq| CpKind::Seq(seq)), -            )), -            occurence, -        ), -        |(kind, occurence)| Cp { kind, occurence }, -    )(input) -} +/// [49]   	choice	   ::=   	'(' S? cp ( S? '|' S? cp )+ S? ')'  #[derive(Clone, Debug)]  pub struct Choice<'s>(Vec<Cp<'s>>); -/// [49]   	choice	   ::=   	'(' S? cp ( S? '|' S? cp )+ S? ')' -pub fn choice(input: &str) -> IResult<&str, Choice> { -    map( -        delimited( -            pair(tag("("), opt(s)), -            pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))), -            pair(opt(s), tag(")")), -        ), -        |(head, tail)| { -            let choice = vec![vec![head], tail].concat(); -            Choice(choice) -        }, -    )(input) -} +/// [50]   	seq	   ::=   	'(' S? cp ( S? ',' S? cp )* S? ')'  #[derive(Clone, Debug)]  pub struct Seq<'s>(Vec<Cp<'s>>); -/// [50]   	seq	   ::=   	'(' S? cp ( S? ',' S? cp )* S? ')' -pub fn seq(input: &str) -> IResult<&str, Seq> { -    map( -        delimited( -            pair(tag("("), opt(s)), -            pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))), -            pair(opt(s), tag(")")), -        ), -        |(head, tail)| { -            let seq = vec![vec![head], tail].concat(); -            Seq(seq) -        }, -    )(input) -}  // always contains #PCDATA -#[derive(Clone, Debug)] -pub struct Mixed<'s>(Vec<QName<'s>>);  /// [19]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'  /// [51]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' -pub fn mixed(input: &str) -> IResult<&str, Mixed> { -    alt(( -        map( -            delimited( -                tuple((tag("("), s, tag("#PCDATA"))), -                many0(preceded(tuple((opt(s), tag("|"), opt(s))), q_name)), -                pair(opt(s), tag(")*")), -            ), -            |names| Mixed(names), -        ), -        value( -            Mixed(Vec::new()), -            tuple((tag("("), opt(s), tag("#PCDATA"), opt(s), tag(")"))), -        ), -    ))(input) -} +#[derive(Clone, Debug)] +pub struct Mixed<'s>(Vec<QName<'s>>); +/// [20]   	AttlistDecl	   ::=   	'<!ATTLIST' S QName AttDef* S? '>' +/// [52]   	AttlistDecl	   ::=   	'<!ATTLIST' S Name AttDef* S? '>'  #[derive(Debug)]  pub struct AttlistDecl<'s> {      element_type: QName<'s>,      att_defs: Vec<AttDef<'s>>,  } -/// [20]   	AttlistDecl	   ::=   	'<!ATTLIST' S QName AttDef* S? '>' -/// [52]   	AttlistDecl	   ::=   	'<!ATTLIST' S Name AttDef* S? '>' -pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> { -    map( -        delimited( -            pair(tag("<!ATTLIST"), s), -            pair(q_name, many0(att_def)), -            pair(opt(s), tag(">")), -        ), -        |(element_type, att_defs)| AttlistDecl { -            element_type, -            att_defs, -        }, -    )(input) -}  #[derive(Debug)]  pub enum AttDefName<'s> {      QName(QName<'s>),      NSAttName(NSAttName<'s>),  } +/// [21]   	AttDef	   ::=   	S (QName | NSAttName) S AttType S DefaultDecl +/// [53]   	AttDef	   ::=   	S Name S AttType S DefaultDecl  #[derive(Debug)]  pub struct AttDef<'s> {      name: AttDefName<'s>,      att_type: AttType<'s>,      default_decl: DefaultDecl<'s>,  } -/// [21]   	AttDef	   ::=   	S (QName | NSAttName) S AttType S DefaultDecl -/// [53]   	AttDef	   ::=   	S Name S AttType S DefaultDecl -pub fn att_def(input: &str) -> IResult<&str, AttDef> { -    map( -        tuple(( -            preceded( -                s, -                alt(( -                    map(q_name, |q_name| AttDefName::QName(q_name)), -                    map(ns_att_name, |ns_att_name| { -                        AttDefName::NSAttName(ns_att_name) -                    }), -                )), -            ), -            preceded(s, att_type), -            preceded(s, default_decl), -        )), -        |(name, att_type, default_decl)| AttDef { -            name, -            att_type, -            default_decl, -        }, -    )(input) -} +/// [54]   	AttType	   ::=   	StringType | TokenizedType | EnumeratedType  #[derive(Clone, Debug)]  pub enum AttType<'s> {      StringType,      TokenizedType(TokenizedType),      EnumeratedType(EnumeratedType<'s>),  } -/// [54]   	AttType	   ::=   	StringType | TokenizedType | EnumeratedType -pub fn att_type(input: &str) -> IResult<&str, AttType> { -    alt(( -        value(AttType::StringType, string_type), -        map(tokenized_type, |tokenized_type| { -            AttType::TokenizedType(tokenized_type) -        }), -        map(enumerated_type, |enumerated_type| { -            AttType::EnumeratedType(enumerated_type) -        }), -    ))(input) -} -pub type StringType<'s> = &'s str;  /// [55]   	StringType	   ::=   	'CDATA' -pub fn string_type(input: &str) -> IResult<&str, StringType> { -    tag("CDATA")(input) -} +#[derive(Clone)] +pub struct StringType; +/// [56]   	TokenizedType	   ::=   	'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'  #[derive(Clone, Debug)]  pub enum TokenizedType {      ID, @@ -1010,265 +427,97 @@ pub enum TokenizedType {      NMToken,      NMTokens,  } -/// [56]   	TokenizedType	   ::=   	'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' -pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> { -    alt(( -        value(TokenizedType::ID, tag("ID")), -        // TODO: check if this is required -        // try idrefs first to avoid losing 'S' -        value(TokenizedType::IDRefs, tag("IDREFS")), -        value(TokenizedType::IDRef, tag("IDREF")), -        value(TokenizedType::Entity, tag("ENTITY")), -        value(TokenizedType::Entities, tag("ENTITIES")), -        // same here -        value(TokenizedType::NMTokens, tag("NMTOKENS")), -        value(TokenizedType::NMToken, tag("NMTOKEN")), -    ))(input) -} +/// [57]   	EnumeratedType	   ::=   	NotationType | Enumeration  #[derive(Debug, Clone)]  pub enum EnumeratedType<'s> {      NotationType(NotationType<'s>),      Enumeration(Enumeration<'s>),  } -/// [57]   	EnumeratedType	   ::=   	NotationType | Enumeration -pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> { -    alt(( -        map(notation_type, |notation_type| { -            EnumeratedType::NotationType(notation_type) -        }), -        map(enumeration, |enumeration| { -            EnumeratedType::Enumeration(enumeration) -        }), -    ))(input) -} +/// [58]   	NotationType	   ::=   	'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'  #[derive(Debug, Clone)]  pub struct NotationType<'s>(Vec<Name<'s>>); -/// [58]   	NotationType	   ::=   	'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' -pub fn notation_type(input: &str) -> IResult<&str, NotationType> { -    map( -        delimited( -            tuple((tag("NOTATION"), s, tag("("), opt(s))), -            pair( -                name, -                many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)), -            ), -            pair(opt(s), tag(")")), -        ), -        |(head, tail)| { -            let notation_type = vec![vec![head], tail].concat(); -            NotationType(notation_type) -        }, -    )(input) -} +/// [59]   	Enumeration	   ::=   	'(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'  #[derive(Debug, Clone)]  pub struct Enumeration<'s>(Vec<Nmtoken<'s>>); -/// [59]   	Enumeration	   ::=   	'(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' -pub fn enumeration(input: &str) -> IResult<&str, Enumeration> { -    map( -        delimited( -            pair(tag("("), opt(s)), -            pair( -                nmtoken, -                many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)), -            ), -            pair(opt(s), tag(")")), -        ), -        |(head, tail)| { -            let enumeration = vec![vec![head], tail].concat(); -            Enumeration(enumeration) -        }, -    )(input) -} +/// [60]   	DefaultDecl	   ::=   	'#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)  #[derive(Debug, Clone)]  pub enum DefaultDecl<'s> {      Required,      Implied,      Fixed(AttValue<'s>),  } -/// [60]   	DefaultDecl	   ::=   	'#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) -pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> { -    alt(( -        value(DefaultDecl::Required, tag("#REQUIRED")), -        value(DefaultDecl::Implied, tag("#IMPLIED")), -        map( -            preceded(opt(pair(tag("#FIXED"), s)), att_value), -            |att_value| DefaultDecl::Fixed(att_value), -        ), -    ))(input) -} +/// [61]   	conditionalSect	   ::=   	includeSect | ignoreSect  pub enum ConditionalSect<'s> {      IncludeSect(IncludeSect<'s>),      IgnoreSect(IgnoreSect<'s>),  } -/// [61]   	conditionalSect	   ::=   	includeSect | ignoreSect -pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> { -    alt(( -        map(include_sect, |include_sect| { -            ConditionalSect::IncludeSect(include_sect) -        }), -        map(ignore_sect, |ignore_sect| { -            ConditionalSect::IgnoreSect(ignore_sect) -        }), -    ))(input) -} -pub struct IncludeSect<'s>(ExtSubsetDecl<'s>);  /// [62]   	includeSect	   ::=   	'<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' -pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> { -    map( -        delimited( -            tuple((tag("<!["), opt(s), tag("INCLUDE"), opt(s), tag("["))), -            ext_subset_decl, -            tag("]]>"), -        ), -        |ext_subset_decl| IncludeSect(ext_subset_decl), -    )(input) -} +pub struct IncludeSect<'s>(ExtSubsetDecl<'s>); -pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);  /// [63]   	ignoreSect	   ::=   	'<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' -pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> { -    map( -        delimited( -            tuple((tag("<!["), opt(s), tag("IGNORE"), opt(s), tag("["))), -            many0(ignore_sect_contents), -            tag("]]>"), -        ), -        |ignore_sect_contents| IgnoreSect(ignore_sect_contents), -    )(input) -} +pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>); +/// [64]   	ignoreSectContents	   ::=   	Ignore ('<![' ignoreSectContents ']]>' Ignore)*  pub struct IgnoreSectContents<'s> {      // TODO: what the fuck does this mean      ignore: Ignore<'s>,      ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,  } -/// [64]   	ignoreSectContents	   ::=   	Ignore ('<![' ignoreSectContents ']]>' Ignore)* -pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> { -    map( -        pair( -            ignore, -            many0(tuple(( -                delimited(tag("<!["), ignore_sect_contents, tag("]]>")), -                ignore, -            ))), -        ), -        |(ignore, ignore_list)| IgnoreSectContents { -            ignore, -            ignore_list, -        }, -    )(input) -} -pub type Ignore<'s> = &'s str;  /// [65]   	Ignore	   ::=   	Char* - (Char* ('<![' | ']]>') Char*) -pub fn ignore(input: &str) -> IResult<&str, Ignore> { -    recognize(many_till(xmlchar, peek(alt((tag("<!["), tag("]]>"))))))(input) -} +pub struct Ignore<'s>(&'s str); +/// [66]   	CharRef	   ::=   	'&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'  #[derive(Clone, Debug)]  pub enum CharRef<'s> {      Decimal(&'s str),      Hexadecimal(&'s str),  } -/// [66]   	CharRef	   ::=   	'&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' -pub fn char_ref(input: &str) -> IResult<&str, CharRef> { -    alt(( -        delimited( -            tag("&#"), -            map(take_while(|c| matches!(c, '0'..='9')), |decimal| { -                CharRef::Decimal(decimal) -            }), -            tag(";"), -        ), -        delimited( -            tag("&#x"), -            map( -                take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )), -                |hexadecimal| CharRef::Hexadecimal(hexadecimal), -            ), -            tag(";"), -        ), -    ))(input) -} +/// [67]   	Reference	   ::=   	EntityRef | CharRef  #[derive(Clone, Debug)]  pub enum Reference<'s> {      EntityRef(EntityRef<'s>),      CharRef(CharRef<'s>),  } -/// [67]   	Reference	   ::=   	EntityRef | CharRef -pub fn reference(input: &str) -> IResult<&str, Reference> { -    alt(( -        map(entity_ref, |entity_ref| Reference::EntityRef(entity_ref)), -        map(char_ref, |char_ref| Reference::CharRef(char_ref)), -    ))(input) -} -pub type EntityRef<'s> = &'s str;  /// [68]   	EntityRef	   ::=   	'&' Name ';' -pub fn entity_ref(input: &str) -> IResult<&str, EntityRef> { -    delimited(tag("&"), name, tag(";"))(input) -} +#[derive(Clone, Debug)] +pub struct EntityRef<'s>(Name<'s>); -pub type PEReference<'s> = &'s str;  /// [69]   	PEReference	   ::=   	'%' Name ';' -pub fn pe_reference(input: &str) -> IResult<&str, PEReference> { -    delimited(tag("%"), name, tag(";"))(input) -} +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct PEReference<'s>(Name<'s>); +/// [70]   	EntityDecl	   ::=   	GEDecl | PEDecl  #[derive(Debug)]  pub enum EntityDecl<'s> {      GEDecl(GEDecl<'s>),      PEDecl(PEDecl<'s>),  } -/// [70]   	EntityDecl	   ::=   	GEDecl | PEDecl -pub fn entity_decl(input: &str) -> IResult<&str, EntityDecl> { -    alt(( -        map(ge_decl, |ge_decl| EntityDecl::GEDecl(ge_decl)), -        map(pe_decl, |pe_decl| EntityDecl::PEDecl(pe_decl)), -    ))(input) -} +/// [71]   	GEDecl	   ::=   	'<!ENTITY' S Name S EntityDef S? '>'  #[derive(Debug)]  pub struct GEDecl<'s> {      name: Name<'s>,      entity_def: EntityDef<'s>,  } -/// [71]   	GEDecl	   ::=   	'<!ENTITY' S Name S EntityDef S? '>' -pub fn ge_decl(input: &str) -> IResult<&str, GEDecl> { -    map( -        delimited( -            pair(tag("<!ENTITY"), s), -            separated_pair(name, s, entity_def), -            pair(opt(s), tag(">")), -        ), -        |(name, entity_def)| GEDecl { name, entity_def }, -    )(input) -} +/// [72]   	PEDecl	   ::=   	'<!ENTITY' S '%' S Name S PEDef S? '>'  #[derive(Debug)]  pub struct PEDecl<'s> {      name: Name<'s>,      pe_def: PEDef<'s>,  } -/// [72]   	PEDecl	   ::=   	'<!ENTITY' S '%' S Name S PEDef S? '>' -pub fn pe_decl(input: &str) -> IResult<&str, PEDecl> { -    map( -        delimited( -            tuple((tag("<!ENTITY"), s, tag("%"), s)), -            separated_pair(name, s, pe_def), -            pair(opt(s), tag(">")), -        ), -        |(name, pe_def)| PEDecl { name, pe_def }, -    )(input) -} +/// [73]   	EntityDef	   ::=   	EntityValue | (ExternalID NDataDecl?)  #[derive(Debug)]  pub enum EntityDef<'s> {      EntityValue(EntityValue<'s>), @@ -1277,249 +526,62 @@ pub enum EntityDef<'s> {          ndata_decl: Option<NDataDecl<'s>>,      },  } -/// [73]   	EntityDef	   ::=   	EntityValue | (ExternalID NDataDecl?) -pub fn entity_def(input: &str) -> IResult<&str, EntityDef> { -    alt(( -        map(entity_value, |entity_value| { -            EntityDef::EntityValue(entity_value) -        }), -        map( -            pair(external_id, opt(ndata_decl)), -            |(external_id, ndata_decl)| EntityDef::ExternalID { -                external_id, -                ndata_decl, -            }, -        ), -    ))(input) -} +/// [74]   	PEDef	   ::=   	EntityValue | ExternalID  #[derive(Debug)]  pub enum PEDef<'s> {      EntityValue(EntityValue<'s>),      ExternalID(ExternalID<'s>),  } -/// [74]   	PEDef	   ::=   	EntityValue | ExternalID -pub fn pe_def(input: &str) -> IResult<&str, PEDef> { -    alt(( -        map(entity_value, |entity_value| { -            PEDef::EntityValue(entity_value) -        }), -        map(external_id, |external_id| PEDef::ExternalID(external_id)), -    ))(input) -} +/// [75]   	ExternalID	   ::=   	'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral  #[derive(Debug)]  pub enum ExternalID<'s> {      SYSTEM { -        system_identifier: &'s str, +        system_identifier: SystemLiteral<'s>,      },      PUBLIC { -        public_identifier: &'s str, -        system_identifier: &'s str, +        public_identifier: PubidLiteral<'s>, +        system_identifier: SystemLiteral<'s>,      },  } -/// [75]   	ExternalID	   ::=   	'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral -// pub fn external_id(input: &str) -> IResult<&str, ExternalID> { -pub fn external_id(input: &str) -> IResult<&str, ExternalID> { -    alt(( -        map( -            preceded(pair(tag("SYSTEM"), s), system_literal), -            |system_identifier| ExternalID::SYSTEM { system_identifier }, -        ), -        map( -            preceded( -                pair(tag("PUBLIC"), s), -                separated_pair(pubid_literal, s, system_literal), -            ), -            |(public_identifier, system_identifier)| ExternalID::PUBLIC { -                public_identifier, -                system_identifier, -            }, -        ), -    ))(input) -} -pub type NDataDecl<'s> = &'s str;  /// [76]   	NDataDecl	   ::=   	S 'NDATA' S Name -pub fn ndata_decl(input: &str) -> IResult<&str, NDataDecl> { -    preceded(tuple((s, tag("NDATA"), s)), name)(input) -} +#[derive(Debug)] +pub struct NDataDecl<'s>(Name<'s>); +/// [77]   	TextDecl	   ::=   	'<?xml' VersionInfo? EncodingDecl S? '?>'  pub struct TextDecl<'s> {      version_info: Option<VersionInfo>,      encoding_decl: EncodingDecl<'s>,  } -/// [77]   	TextDecl	   ::=   	'<?xml' VersionInfo? EncodingDecl S? '?>' -pub fn text_decl(input: &str) -> IResult<&str, TextDecl> { -    map( -        delimited( -            tag("<?xml"), -            pair(opt(version_info), terminated(encoding_decl, opt(s))), -            tag("?>"), -        ), -        |(version_info, encoding_decl)| TextDecl { -            version_info, -            encoding_decl, -        }, -    )(input) -} +/// [78]   	extParsedEnt	   ::=   	TextDecl? content  pub struct ExtParsedEnt<'s> {      text_decl: Option<TextDecl<'s>>,      content: Content<'s>,  } -/// [78]   	extParsedEnt	   ::=   	TextDecl? content -pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> { -    map(pair(opt(text_decl), content), |(text_decl, content)| { -        ExtParsedEnt { text_decl, content } -    })(input) -} -pub type EncodingDecl<'s> = EncName<'s>;  /// [80]   	EncodingDecl	   ::=   	S 'encoding' Eq ('"' EncName '"' | "'" EncName -pub fn encoding_decl(input: &str) -> IResult<&str, EncodingDecl> { -    preceded( -        tuple((s, tag("encoding"), eq)), -        alt(( -            delimited(char('"'), enc_name, char('"')), -            delimited(char('\''), enc_name, char('\'')), -        )), -    )(input) -} +#[derive(Debug)] +pub struct EncodingDecl<'s>(EncName<'s>); -pub type EncName<'s> = &'s str;  /// [81]   	EncName	   ::=   	[A-Za-z] ([A-Za-z0-9._] | '-')* -pub fn enc_name(input: &str) -> IResult<&str, EncName> { -    recognize(pair( -        satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )), -        many0(satisfy( -            |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ), -        )), -    ))(input) -} -  #[derive(Debug)] -pub struct NotationDecl<'s> { -    name: &'s str, -    id: NotationDeclID<'s>, -} +pub struct EncName<'s>(&'s str); +  #[derive(Debug)]  pub enum NotationDeclID<'s> {      External(ExternalID<'s>),      Public(PublicID<'s>),  }  /// [82]   	NotationDecl	   ::=   	'<!NOTATION' S Name S (ExternalID | PublicID) S? '>' -pub fn notation_decl(input: &str) -> IResult<&str, NotationDecl> { -    map( -        delimited( -            pair(tag("<!NOTATION"), s), -            separated_pair( -                name, -                s, -                alt(( -                    map(external_id, |external_id| { -                        NotationDeclID::External(external_id) -                    }), -                    map(public_id, |public_id| NotationDeclID::Public(public_id)), -                )), -            ), -            pair(opt(s), tag(">")), -        ), -        |(name, id)| NotationDecl { name, id }, -    )(input) +#[derive(Debug)] +pub struct NotationDecl<'s> { +    name: Name<'s>, +    id: NotationDeclID<'s>,  } -pub type PublicID<'s> = &'s str;  /// [83]   	PublicID	   ::=   	'PUBLIC' S PubidLiteral -pub fn public_id(input: &str) -> IResult<&str, PublicID> { -    preceded(pair(tag("PUBLIC"), s), pubid_literal)(input) -} - -#[cfg(test)] -mod tests { -    use std::num::NonZero; - -    use super::*; - -    #[test] -    fn test_char_data() { -        assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi")); -        assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi")); -        assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi")); -        assert_eq!(Ok(("]]>def&ghi", "abc")), char_data("abc]]>def&ghi")); -        assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi")); -        assert_eq!( -            Err(Err::Incomplete(nom::Needed::Size( -                NonZero::new(3usize).unwrap() -            ))), -            char_data("abcdefghi") -        ); -    } - -    #[test] -    fn test_comment() { -        assert_eq!(Ok(("", "")), comment("<!---->")); -        assert_eq!(Ok(("", "asdf")), comment("<!--asdf-->")); -        assert_eq!(Ok(("", "as-df")), comment("<!--as-df-->")); -        assert_eq!( -            Err(Err::Incomplete(nom::Needed::Size( -                NonZero::new(2usize).unwrap() -            ))), -            comment("<!--asdf") -        ); -    } - -    #[test] -    fn test_pi_target() { -        assert_eq!(Ok((" ", "asdf")), pi_target("asdf ")); -        assert_eq!(Ok((" ", "xmlasdf")), pi_target("xmlasdf ")); -        assert_eq!( -            Err(Err::Error(Error { -                input: "xml ", -                code: ErrorKind::Tag -            })), -            pi_target("xml ") -        ); -        assert_eq!( -            Err(Err::Error(Error { -                input: "xMl ", -                code: ErrorKind::Tag -            })), -            pi_target("xMl ") -        ); -    } - -    #[test] -    fn test_cd_sect() { -        assert_eq!( -            Ok(( -                "", -                ("<![CDATA[", "<greeting>Hello, world!</greeting>", "]]>") -            )), -            cd_sect("<![CDATA[<greeting>Hello, world!</greeting>]]>") -        ) -    } - -    #[test] -    fn test_cd_start() { -        assert_eq!(Ok(("asdf", "<![CDATA[")), cd_start("<![CDATA[asdf")) -    } - -    #[test] -    fn test_cdata() { -        assert_eq!(Ok(("]]>asdf", "asdf")), cdata("asdf]]>asdf")); -        assert_eq!( -            Ok(("]]>asdf", "<![CDATA[asdf")), -            cdata("<![CDATA[asdf]]>asdf") -        ); -        assert_eq!( -            Ok(("]]>asdf", "<greeting>Hello, world!</greeting>")), -            cdata("<greeting>Hello, world!</greeting>]]>asdf") -        ) -    } - -    #[test] -    fn test_cd_end() { -        assert_eq!(Ok(("asdf", "]]>")), cd_end("]]>asdf")) -    } -} +#[derive(Debug)] +pub struct PublicID<'s>(PubidLiteral<'s>); diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs index 8b13789..135f5c7 100644 --- a/src/xml/parsers.rs +++ b/src/xml/parsers.rs @@ -1 +1,1503 @@ +use nom::{ +    branch::alt, +    bytes::streaming::{is_a, tag, take, take_while}, +    character::{ +        complete::one_of, +        streaming::{char, none_of, satisfy}, +    }, +    combinator::{map, not, opt, peek, recognize, value}, +    error::{Error, ErrorKind}, +    multi::{many0, many1, many_till}, +    sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, +    Err, IResult, Parser as NomParser, +}; +use crate::xml::NSAttName; + +use super::{ +    AttDef, AttDefName, AttType, AttValue, AttlistDecl, Attribute, CDEnd, CDSect, CDStart, CData, +    Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment, ConditionalSect, Content, +    ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName, DefaultDecl, DoctypeDecl, +    Document, ETag, Element, Elementdecl, EmptyElemTag, EncName, EncodingDecl, EntityDecl, +    EntityDef, EntityRef, EntityValue, EnumeratedType, Enumeration, Eq, ExtParsedEnt, ExtSubset, +    ExtSubsetDecl, ExtSubsetDeclaration, ExternalID, GEDecl, Ignore, IgnoreSect, +    IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration, LiteralData, LocalPart, +    MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names, Nmtoken, +    Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef, PEReference, +    PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral, PublicID, +    QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl, TokenizedType, +    UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S, +}; + +pub trait Parser<'s, T> { +    fn parse(input: &'s str) -> IResult<&str, T>; +} + +/// [1]   	NSAttName	   ::=   	PrefixedAttName | DefaultAttName +impl<'s> Parser<'s, NSAttName<'s>> for NSAttName<'s> { +    fn parse(input: &'s str) -> IResult<&str, NSAttName<'s>> { +        alt(( +            map(PrefixedAttName::parse, |prefixed_att_name| { +                NSAttName::PrefixedAttName(prefixed_att_name) +            }), +            value(NSAttName::DefaultAttName, DefaultAttName::parse), +        ))(input) +    } +} + +/// [2]   	PrefixedAttName	   ::=   	'xmlns:' NCName +impl<'s> Parser<'s, PrefixedAttName<'s>> for PrefixedAttName<'s> { +    fn parse(input: &'s str) -> IResult<&str, PrefixedAttName<'s>> { +        map(preceded(tag("xmlns:"), NCName::parse), |nc_name| { +            PrefixedAttName(nc_name) +        })(input) +    } +} + +/// [3]   	DefaultAttName	   ::=   	'xmlns'; +impl Parser<'_, DefaultAttName> for DefaultAttName { +    fn parse(input: &str) -> IResult<&str, DefaultAttName> { +        value(DefaultAttName, tag("xmlns"))(input) +    } +} + +/// [4]   	NCName	   ::=   	Name - (Char* ':' Char*) +impl<'s> Parser<'s, NCName<'s>> for NCName<'s> { +    fn parse(input: &'s str) -> IResult<&str, NCName<'s>> { +        map( +            recognize(pair( +                recognize(NameStartChar::parse).and_then(satisfy(|c| c != ':')), +                many_till(NameChar::parse, peek(char(':'))), +            )), +            |nc_name| NCName(nc_name), +        )(input) +    } +} + +/// [7]   	QName	   ::=   	PrefixedName | UnprefixedName +impl<'s> Parser<'s, QName<'s>> for QName<'s> { +    fn parse(input: &'s str) -> IResult<&str, QName<'s>> { +        alt(( +            map(PrefixedName::parse, |prefixed_name| { +                QName::PrefixedName(prefixed_name) +            }), +            map(UnprefixedName::parse, |unprefixed_name| { +                QName::UnprefixedName(unprefixed_name) +            }), +        ))(input) +    } +} + +/// [8]   	PrefixedName	   ::=   	Prefix ':' LocalPart +impl<'s> Parser<'s, PrefixedName<'s>> for PrefixedName<'s> { +    fn parse(input: &'s str) -> IResult<&str, PrefixedName<'s>> { +        map( +            separated_pair(Prefix::parse, char(':'), LocalPart::parse), +            |(prefix, local_part)| PrefixedName { prefix, local_part }, +        )(input) +    } +} + +/// [9]   	UnprefixedName	   ::=   	LocalPart +impl<'s> Parser<'s, UnprefixedName<'s>> for UnprefixedName<'s> { +    fn parse(input: &'s str) -> IResult<&str, UnprefixedName<'s>> { +        map(LocalPart::parse, |local_part| UnprefixedName(local_part))(input) +    } +} + +/// [10]   	Prefix	   ::=   	NCName +impl<'s> Parser<'s, Prefix<'s>> for Prefix<'s> { +    fn parse(input: &'s str) -> IResult<&str, Prefix<'s>> { +        map(NCName::parse, |nc_name| Prefix(nc_name))(input) +    } +} + +/// [11]   	LocalPart	   ::=   	NCName +impl<'s> Parser<'s, LocalPart<'s>> for LocalPart<'s> { +    fn parse(input: &'s str) -> IResult<&str, LocalPart<'s>> { +        map(NCName::parse, |nc_name| LocalPart(nc_name))(input) +    } +} + +// xml spec + +/// [1]   	document	   ::=   	prolog element Misc* +impl<'s> Parser<'s, Document<'s>> for Document<'s> { +    fn parse(input: &'s str) -> IResult<&str, Document<'s>> { +        tuple((Prolog::parse, Element::parse, many0(Misc::parse)))(input) +    } +} + +/// [2]   	Char	   ::=   	#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]	/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ +impl Parser<'_, Char> for Char { +    fn parse(input: &str) -> IResult<&str, Char> { +        map( +            satisfy( +                |c| matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}'), +            ), +            |char| Char(char), +        )(input) +    } +} + +/// [3]   	S	   ::=   	(#x20 | #x9 | #xD | #xA)+ +impl<'s> Parser<'s, S<'s>> for S<'s> { +    fn parse(input: &'s str) -> IResult<&str, S<'s>> { +        map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input) +    } +} + +/// [4]   	NameStartChar	   ::=   	":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] +impl Parser<'_, NameStartChar> for NameStartChar { +    fn parse(input: &str) -> IResult<&str, NameStartChar> { +        map( +            satisfy( +                |c| matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}'), +            ), +            |c| NameStartChar(c), +        )(input) +    } +} + +/// [4a]   	NameChar	   ::=   	NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] +impl Parser<'_, NameChar> for NameChar { +    fn parse(input: &str) -> IResult<&str, NameChar> { +        map( +            alt(( +                map(NameStartChar::parse, |NameStartChar(c)| c), +                satisfy( +                    |c| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'), +                ), +            )), +            |c| NameChar(c), +        )(input) +    } +} + +/// [5]   	Name	   ::=   	NameStartChar (NameChar)* +impl<'s> Parser<'s, Name<'s>> for Name<'s> { +    fn parse(input: &'s str) -> IResult<&str, Name<'s>> { +        map( +            recognize(pair(NameStartChar::parse, many0(NameChar::parse))), +            |name| Name(name), +        )(input) +    } +} + +/// [6]   	Names	   ::=   	Name (#x20 Name)* +impl<'s> Parser<'s, Names<'s>> for Names<'s> { +    // TODO: fix +    fn parse(input: &'s str) -> IResult<&str, Names<'s>> { +        map( +            recognize(pair(Name::parse, many0(pair(char('\u{20}'), Name::parse)))), +            |names| Names(names), +        )(input) +    } +} + +/// [7]   	Nmtoken	   ::=   	(NameChar)+ +impl<'s> Parser<'s, Nmtoken<'s>> for Nmtoken<'s> { +    fn parse(input: &'s str) -> IResult<&str, Nmtoken<'s>> { +        map(recognize(many1(NameChar::parse)), |nmtoken| { +            Nmtoken(nmtoken) +        })(input) +    } +} + +/// [8]   	Nmtokens	   ::=   	Nmtoken (#x20 Nmtoken)* +impl<'s> Parser<'s, Nmtokens<'s>> for Nmtokens<'s> { +    fn parse(input: &'s str) -> IResult<&str, Nmtokens<'s>> { +        map( +            recognize(pair( +                Nmtoken::parse, +                many0(pair(char('\u{20}'), Nmtoken::parse)), +            )), +            |nmtokens| Nmtokens(nmtokens), +        )(input) +    } +} + +/// [9]   	EntityValue	   ::=   	'"' ([^%&"] | PEReference | Reference)* '"' +///			|  "'" ([^%&'] | PEReference | Reference)* "'" +impl<'s> Parser<'s, EntityValue<'s>> for EntityValue<'s> { +    fn parse(input: &'s str) -> IResult<&str, EntityValue<'s>> { +        map( +            alt(( +                delimited( +                    char('"'), +                    many0(alt(( +                        map( +                            recognize(many_till(take(1usize), peek(one_of("%&\"")))), +                            |string| LiteralData::String(string), +                        ), +                        map(PEReference::parse, |pe_reference| { +                            LiteralData::PEReference(pe_reference) +                        }), +                        map(Reference::parse, |reference| { +                            LiteralData::Reference(reference) +                        }), +                    ))), +                    char('"'), +                ), +                delimited( +                    char('\''), +                    many0(alt(( +                        map( +                            recognize(many_till(take(1usize), peek(one_of("%&'")))), +                            |string| LiteralData::String(string), +                        ), +                        map(PEReference::parse, |pe_reference| { +                            LiteralData::PEReference(pe_reference) +                        }), +                        map(Reference::parse, |reference| { +                            LiteralData::Reference(reference) +                        }), +                    ))), +                    char('\''), +                ), +            )), +            |entity_value| EntityValue(entity_value), +        )(input) +    } +} + +/// [10]   	AttValue	   ::=   	'"' ([^<&"] | Reference)* '"' +/// 			|  "'" ([^<&'] | Reference)* "'" +impl<'s> Parser<'s, AttValue<'s>> for AttValue<'s> { +    fn parse(input: &'s str) -> IResult<&str, AttValue<'s>> { +        map( +            alt(( +                delimited( +                    char('"'), +                    many0(alt(( +                        map( +                            recognize(many_till(take(1usize), peek(one_of("%&\"")))), +                            |string| LiteralData::String(string), +                        ), +                        map(Reference::parse, |reference| { +                            LiteralData::Reference(reference) +                        }), +                    ))), +                    char('"'), +                ), +                delimited( +                    char('\''), +                    many0(alt(( +                        map( +                            recognize(many_till(take(1usize), peek(one_of("%&'")))), +                            |string| LiteralData::String(string), +                        ), +                        map(Reference::parse, |reference| { +                            LiteralData::Reference(reference) +                        }), +                    ))), +                    char('\''), +                ), +            )), +            |att_value| AttValue(att_value), +        )(input) +    } +} + +/// [11]   	SystemLiteral	   ::=   	('"' [^"]* '"') | ("'" [^']* "'") +impl<'s> Parser<'s, SystemLiteral<'s>> for SystemLiteral<'s> { +    fn parse(input: &'s str) -> IResult<&str, SystemLiteral<'s>> { +        map( +            alt(( +                delimited(char('"'), recognize(many0(none_of("\""))), char('"')), +                delimited(char('\''), recognize(many0(none_of("'"))), char('\'')), +            )), +            |system_literal| SystemLiteral(system_literal), +        )(input) +    } +} + +/// [12]   	PubidLiteral	   ::=   	'"' PubidChar* '"' | "'" (PubidChar - "'")* "'" +impl<'s> Parser<'s, PubidLiteral<'s>> for PubidLiteral<'s> { +    fn parse(input: &'s str) -> IResult<&str, PubidLiteral<'s>> { +        map( +            alt(( +                delimited(char('"'), recognize(many0(PubidChar::parse)), char('"')), +                delimited( +                    char('\''), +                    recognize(many0(recognize(not(char('\''))).and_then(PubidChar::parse))), +                    char('\''), +                ), +            )), +            |pubid_literal| PubidLiteral(pubid_literal), +        )(input) +    } +} + +/// [13]   	PubidChar	   ::=   	#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] +impl Parser<'_, PubidChar> for PubidChar { +    fn parse(input: &'_ str) -> IResult<&str, PubidChar> { +        map( +            satisfy( +                |c| matches!(c, '\u{20}' | '\u{D}' | '\u{A}' | 'a'..='z' | 'A'..='Z' | '0'..='9'), +            ), +            |pubid_char| PubidChar(pubid_char), +        )(input) +    } +} + +/// [14]   	CharData	   ::=   	[^<&]* - ([^<&]* ']]>' [^<&]*) +impl<'s> Parser<'s, CharData<'s>> for CharData<'s> { +    fn parse(input: &'s str) -> IResult<&str, CharData<'s>> { +        map( +            recognize(many_till( +                none_of("<&"), +                peek(alt((recognize(one_of("<&")), tag("]]>")))), +            )), +            |char_data| CharData(char_data), +        )(input) +    } +} + +/// Comment	   ::=   	'<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' +impl<'s> Parser<'s, Comment<'s>> for Comment<'s> { +    fn parse(input: &'s str) -> IResult<&str, Comment<'s>> { +        map( +            delimited( +                tag("<!--"), +                recognize(many_till(Char::parse, peek(tag("--")))), +                tag("-->"), +            ), +            |comment| Comment(comment), +        )(input) +    } +} + +/// [16]   	PI	   ::=   	'<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' +impl<'s> Parser<'s, PI<'s>> for PI<'s> { +    fn parse(input: &'s str) -> IResult<&str, PI<'s>> { +        map( +            delimited( +                tag("<?"), +                pair( +                    PITarget::parse, +                    opt(recognize(pair( +                        S::parse, +                        many_till(Char::parse, peek(tag("?>"))), +                    ))), +                ), +                tag("?>"), +            ), +            |(target, instruction)| PI { +                target, +                instruction, +            }, +        )(input) +    } +} + +/// [17]   	PITarget	   ::=   	Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) +impl<'s> Parser<'s, PITarget<'s>> for PITarget<'s> { +    fn parse(input: &'s str) -> IResult<&str, PITarget<'s>> { +        let (rest, name) = Name::parse(input)?; +        if name.0.to_lowercase() == "xml" { +            return Err(Err::Error(Error { +                input, +                // TODO: check if better error to return +                code: ErrorKind::Tag, +            })); +        } else { +            return Ok((rest, PITarget(name))); +        } +    } +} + +/// [18]   	CDSect	   ::=   	CDStart CData CDEnd +impl<'s> Parser<'s, CDSect<'s>> for CDSect<'s> { +    fn parse(input: &'s str) -> IResult<&str, CDSect<'s>> { +        map(CData::parse, |c_data| CDSect(c_data))(input) +    } +} + +/// [19]   	CDStart	   ::=   	'<![CDATA[' +impl Parser<'_, CDStart> for CDStart { +    fn parse(input: &'_ str) -> IResult<&str, CDStart> { +        value(CDStart, tag("<![CDATA["))(input) +    } +} + +/// [20]   	CData	   ::=   	(Char* - (Char* ']]>' Char*)) +impl<'s> Parser<'s, CData<'s>> for CData<'s> { +    fn parse(input: &'s str) -> IResult<&str, CData<'s>> { +        map( +            recognize(many_till(Char::parse, peek(tag("]]>")))), +            |c_data| CData(c_data), +        )(input) +    } +} + +/// [21]   	CDEnd	   ::=   	']]>' +impl Parser<'_, CDEnd> for CDEnd { +    fn parse(input: &'_ str) -> IResult<&str, CDEnd> { +        value(CDEnd, tag("]]>"))(input) +    } +} + +/// [22]   	prolog	   ::=   	XMLDecl? Misc* (doctypedecl Misc*)? +impl<'s> Parser<'s, Prolog<'s>> for Prolog<'s> { +    fn parse(input: &'s str) -> IResult<&str, Prolog<'s>> { +        tuple(( +            opt(XMLDecl::parse), +            many0(Misc::parse), +            opt(tuple((DoctypeDecl::parse, many0(Misc::parse)))), +        ))(input) +    } +} + +/// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' +impl<'s> Parser<'s, XMLDecl<'s>> for XMLDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, XMLDecl<'s>> { +        map( +            delimited( +                tag("<?xml"), +                tuple(( +                    VersionInfo::parse, +                    opt(EncodingDecl::parse), +                    opt(SDDecl::parse), +                )), +                pair(opt(S::parse), tag("?>")), +            ), +            |(version_info, encoding_decl, sd_decl)| XMLDecl { +                version_info, +                encoding_decl, +                sd_decl, +            }, +        )(input) +    } +} + +/// [24]   	VersionInfo	   ::=   	S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') +impl Parser<'_, VersionInfo> for VersionInfo { +    fn parse(input: &'_ str) -> IResult<&str, VersionInfo> { +        map( +            preceded( +                tuple((S::parse, tag("version"), Eq::parse)), +                alt(( +                    delimited(char('\''), VersionNum::parse, char('\'')), +                    delimited(char('"'), VersionNum::parse, char('"')), +                )), +            ), +            |version_num| VersionInfo(version_num), +        )(input) +    } +} + +/// [25]   	Eq	   ::=   	S? '=' S? +impl Parser<'_, Eq> for Eq { +    fn parse(input: &'_ str) -> IResult<&str, Eq> { +        value( +            Eq, +            recognize(tuple((opt(S::parse), char('='), opt(S::parse)))), +        )(input) +    } +} + +/// [26]   	VersionNum	   ::=   	'1.' [0-9]+ +impl Parser<'_, VersionNum> for VersionNum { +    fn parse(input: &'_ str) -> IResult<&str, VersionNum> { +        preceded( +            tag("1."), +            alt(( +                value(VersionNum::One, char('0')), +                value(VersionNum::OneDotOne, char('1')), +            )), +        )(input) +    } +} + +/// [27]   	Misc	   ::=   	Comment | PI | S +impl<'s> Parser<'s, Misc<'s>> for Misc<'s> { +    fn parse(input: &'s str) -> IResult<&str, Misc<'s>> { +        alt(( +            map(Comment::parse, |comment| Misc::Comment(comment)), +            map(PI::parse, |pi| Misc::PI(pi)), +            value(Misc::S, S::parse), +        ))(input) +    } +} + +/// [16]   	doctypedecl	   ::=   	'<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' +/// [28]   	doctypedecl	   ::=   	'<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' +impl<'s> Parser<'s, DoctypeDecl<'s>> for DoctypeDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, DoctypeDecl<'s>> { +        map( +            delimited( +                pair(tag("<!DOCTYPE"), S::parse), +                tuple(( +                    QName::parse, +                    opt(preceded(S::parse, ExternalID::parse)), +                    preceded( +                        opt(S::parse), +                        opt(terminated( +                            delimited(tag("["), IntSubset::parse, tag("]")), +                            opt(S::parse), +                        )), +                    ), +                )), +                tag(">"), +            ), +            |(name, external_id, int_subset)| DoctypeDecl { +                name, +                external_id, +                int_subset, +            }, +        )(input) +    } +} + +/// [28a]   	DeclSep	   ::=   	PEReference | S +impl<'s> Parser<'s, DeclSep<'s>> for DeclSep<'s> { +    fn parse(input: &'s str) -> IResult<&str, DeclSep<'s>> { +        alt(( +            map(PEReference::parse, |pe_reference| { +                DeclSep::PEReference(pe_reference) +            }), +            value(DeclSep::S, S::parse), +        ))(input) +    } +} + +/// [28b]   	intSubset	   ::=   	(markupdecl | DeclSep)* +impl<'s> Parser<'s, IntSubset<'s>> for IntSubset<'s> { +    fn parse(input: &'s str) -> IResult<&str, IntSubset<'s>> { +        many0(alt(( +            map(MarkupDecl::parse, |markup_decl| { +                IntSubsetDeclaration::MarkupDecl(markup_decl) +            }), +            map(DeclSep::parse, |decl_sep| { +                IntSubsetDeclaration::DeclSep(decl_sep) +            }), +        )))(input) +    } +} + +/// [29]   	markupdecl	   ::=   	elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment +impl<'s> Parser<'s, MarkupDecl<'s>> for MarkupDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, MarkupDecl<'s>> { +        alt(( +            map(Elementdecl::parse, |elementdecl| { +                MarkupDecl::Elementdecl(elementdecl) +            }), +            map(AttlistDecl::parse, |attlist_decl| { +                MarkupDecl::AttlistDecl(attlist_decl) +            }), +            map(EntityDecl::parse, |entity_decl| { +                MarkupDecl::EntityDecl(entity_decl) +            }), +            map(NotationDecl::parse, |notation_decl| { +                MarkupDecl::NotationDecl(notation_decl) +            }), +            map(PI::parse, |pi| MarkupDecl::PI(pi)), +            map(Comment::parse, |comment| MarkupDecl::Comment(comment)), +        ))(input) +    } +} + +/// [30]   	extSubset	   ::=   	TextDecl? extSubsetDecl +impl<'s> Parser<'s, ExtSubset<'s>> for ExtSubset<'s> { +    fn parse(input: &'s str) -> IResult<&str, ExtSubset<'s>> { +        map( +            pair(opt(TextDecl::parse), ExtSubsetDecl::parse), +            |(text_decl, ext_subset_decl)| ExtSubset { +                text_decl, +                ext_subset_decl, +            }, +        )(input) +    } +} + +/// [31]   	extSubsetDecl	   ::=   	( markupdecl | conditionalSect | DeclSep)* +impl<'s> Parser<'s, ExtSubsetDecl<'s>> for ExtSubsetDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, ExtSubsetDecl<'s>> { +        many0(alt(( +            map(MarkupDecl::parse, |markup_decl| { +                ExtSubsetDeclaration::MarkupDecl(markup_decl) +            }), +            map(ConditionalSect::parse, |conditional_sect| { +                ExtSubsetDeclaration::ConditionalSect(conditional_sect) +            }), +            map(DeclSep::parse, |decl_sep| { +                ExtSubsetDeclaration::DeclSep(decl_sep) +            }), +        )))(input) +    } +} + +/// [32]   	SDDecl	   ::=   	S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) +impl Parser<'_, SDDecl> for SDDecl { +    fn parse(input: &'_ str) -> IResult<&str, SDDecl> { +        preceded( +            tuple((S::parse, tag("standalone"), Eq::parse)), +            alt(( +                delimited( +                    char('\''), +                    alt((value(true, tag("yes")), value(false, tag("no")))), +                    char('\''), +                ), +                delimited( +                    char('"'), +                    alt((value(true, tag("yes")), value(false, tag("no")))), +                    char('"'), +                ), +            )), +        )(input) +    } +} + +// (Productions 33 through 38 have been removed.) + +/// [39]   	element	   ::=   	EmptyElemTag | STag content ETag +impl<'s> Parser<'s, Element<'s>> for Element<'s> { +    fn parse(input: &'s str) -> IResult<&str, Element<'s>> { +        alt(( +            map(EmptyElemTag::parse, |empty_elem_tag| { +                Element::Empty(empty_elem_tag) +            }), +            map( +                tuple((STag::parse, Content::parse, ETag::parse)), +                |(s_tag, content, e_tag)| Element::NotEmpty(s_tag, content, e_tag), +            ), +        ))(input) +    } +} + +/// [12]   	STag	   ::=   	'<' QName (S Attribute)* S? '>' +/// [40]   	STag	   ::=   	'<' Name (S Attribute)* S? '>' +impl<'s> Parser<'s, STag<'s>> for STag<'s> { +    fn parse(input: &'s str) -> IResult<&str, STag<'s>> { +        map( +            delimited( +                tag("<"), +                pair(QName::parse, many0(preceded(S::parse, Attribute::parse))), +                pair(opt(S::parse), tag(">")), +            ), +            |(name, attributes)| STag { name, attributes }, +        )(input) +    } +} + +/// [15]   	Attribute	   ::=   	NSAttName Eq AttValue | QName Eq AttValue +impl<'s> Parser<'s, Attribute<'s>> for Attribute<'s> { +    fn parse(input: &'s str) -> IResult<&str, Attribute<'s>> { +        alt(( +            map( +                separated_pair(NSAttName::parse, Eq::parse, AttValue::parse), +                |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value }, +            ), +            map( +                separated_pair(QName::parse, Eq::parse, AttValue::parse), +                |(name, value)| Attribute::Attribute { name, value }, +            ), +        ))(input) +    } +} +// pub type Attribute<'s> = (Name<'s>, AttValue<'s>); +/// [41]   	Attribute	   ::=   	Name Eq AttValue +// pub fn attribute(input: &str) -> IResult<&str, Attribute> { +//     separated_pair(name, eq, att_value)(input) +// } + +/// [13]   	ETag	   ::=   	'</' QName S? '>' +/// [42]   	ETag	   ::=   	'</' Name S? '>' +impl<'s> Parser<'s, ETag<'s>> for ETag<'s> { +    fn parse(input: &'s str) -> IResult<&str, ETag<'s>> { +        map( +            delimited(tag("</"), QName::parse, pair(opt(S::parse), tag(">"))), +            |name| ETag { name }, +        )(input) +    } +} + +/// [43]   	content	   ::=   	CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* +impl<'s> Parser<'s, Content<'s>> for Content<'s> { +    fn parse(input: &'s str) -> IResult<&str, Content<'s>> { +        map( +            pair( +                opt(CharData::parse), +                many0(pair( +                    alt(( +                        map(Element::parse, |element| ContentItem::Element(element)), +                        map(Reference::parse, |reference| { +                            ContentItem::Reference(reference) +                        }), +                        map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)), +                        map(PI::parse, |pi| ContentItem::PI(pi)), +                        map(Comment::parse, |comment| ContentItem::Comment(comment)), +                    )), +                    opt(CharData::parse), +                )), +            ), +            |(char_data, content)| Content { char_data, content }, +        )(input) +    } +} + +/// [14]   	EmptyElemTag	   ::=   	'<' QName (S Attribute)* S? '/>' +/// [44]   	EmptyElemTag	   ::=   	'<' Name (S Attribute)* S? '/>'	[WFC: Unique Att Spec] +impl<'s> Parser<'s, EmptyElemTag<'s>> for EmptyElemTag<'s> { +    fn parse(input: &'s str) -> IResult<&str, EmptyElemTag<'s>> { +        map( +            delimited( +                tag("<"), +                pair(QName::parse, many0(preceded(S::parse, Attribute::parse))), +                pair(opt(S::parse), tag("/>")), +            ), +            |(name, attributes)| EmptyElemTag { name, attributes }, +        )(input) +    } +} + +/// [17]   	elementdecl	   ::=   	'<!ELEMENT' S QName S contentspec S? '>' +/// [45]   	elementdecl	   ::=   	'<!ELEMENT' S Name S contentspec S? '>' +impl<'s> Parser<'s, Elementdecl<'s>> for Elementdecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, Elementdecl> { +        map( +            delimited( +                pair(tag("<!ELEMENT"), S::parse), +                separated_pair(QName::parse, S::parse, Contentspec::parse), +                pair(opt(S::parse), tag(">")), +            ), +            |(name, contentspec)| Elementdecl { name, contentspec }, +        )(input) +    } +} + +/// [46]   	contentspec	   ::=   	'EMPTY' | 'ANY' | Mixed | children +impl<'s> Parser<'s, Contentspec<'s>> for Contentspec<'s> { +    fn parse(input: &'s str) -> IResult<&str, Contentspec<'s>> { +        alt(( +            value(Contentspec::Empty, tag("EMPTY")), +            value(Contentspec::Any, tag("ANY")), +            map(Mixed::parse, |mixed| Contentspec::Mixed(mixed)), +            map(Children::parse, |children| Contentspec::Children(children)), +        ))(input) +    } +} + +/// Occurence ::= ('?' | '*' | '+')? +impl Parser<'_, Occurence> for Occurence { +    fn parse(input: &'_ str) -> IResult<&str, Occurence> { +        map( +            opt(alt((tag("?"), tag("*"), tag("+")))), +            |occurence| match occurence { +                Some("?") => Occurence::Optional, +                Some("*") => Occurence::Many0, +                Some("+") => Occurence::Many1, +                _ => Occurence::Once, +            }, +        )(input) +    } +} + +/// [47]   	children	   ::=   	(choice | seq) ('?' | '*' | '+')? +impl<'s> Parser<'s, Children<'s>> for Children<'s> { +    fn parse(input: &'s str) -> IResult<&str, Children<'s>> { +        map( +            pair( +                alt(( +                    map(Choice::parse, |choice| ChildrenKind::Choice(choice)), +                    map(Seq::parse, |seq| ChildrenKind::Seq(seq)), +                )), +                Occurence::parse, +            ), +            |(kind, occurence)| Children { kind, occurence }, +        )(input) +    } +} + +/// [18]   	cp	   ::=   	(QName | choice | seq) ('?' | '*' | '+')? +/// [48]   	cp	   ::=   	(Name | choice | seq) ('?' | '*' | '+')? +impl<'s> Parser<'s, Cp<'s>> for Cp<'s> { +    fn parse(input: &'s str) -> IResult<&str, Cp<'s>> { +        map( +            pair( +                alt(( +                    map(QName::parse, |name| CpKind::Name(name)), +                    map(Choice::parse, |choice| CpKind::Choice(choice)), +                    map(Seq::parse, |seq| CpKind::Seq(seq)), +                )), +                Occurence::parse, +            ), +            |(kind, occurence)| Cp { kind, occurence }, +        )(input) +    } +} + +/// [49]   	choice	   ::=   	'(' S? cp ( S? '|' S? cp )+ S? ')' +impl<'s> Parser<'s, Choice<'s>> for Choice<'s> { +    fn parse(input: &'s str) -> IResult<&str, Choice<'s>> { +        map( +            delimited( +                pair(tag("("), opt(S::parse)), +                pair( +                    Cp::parse, +                    many1(preceded( +                        tuple((opt(S::parse), tag("|"), opt(S::parse))), +                        Cp::parse, +                    )), +                ), +                pair(opt(S::parse), tag(")")), +            ), +            |(head, tail)| { +                let choice = vec![vec![head], tail].concat(); +                Choice(choice) +            }, +        )(input) +    } +} + +/// [50]   	seq	   ::=   	'(' S? cp ( S? ',' S? cp )* S? ')' +impl<'s> Parser<'s, Seq<'s>> for Seq<'s> { +    fn parse(input: &'s str) -> IResult<&str, Seq<'s>> { +        map( +            delimited( +                pair(tag("("), opt(S::parse)), +                pair( +                    Cp::parse, +                    many0(preceded( +                        tuple((opt(S::parse), tag(","), opt(S::parse))), +                        Cp::parse, +                    )), +                ), +                pair(opt(S::parse), tag(")")), +            ), +            |(head, tail)| { +                let seq = vec![vec![head], tail].concat(); +                Seq(seq) +            }, +        )(input) +    } +} + +/// [19]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')' +/// [51]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' +impl<'s> Parser<'s, Mixed<'s>> for Mixed<'s> { +    fn parse(input: &'s str) -> IResult<&str, Mixed<'s>> { +        alt(( +            map( +                delimited( +                    tuple((tag("("), S::parse, tag("#PCDATA"))), +                    many0(preceded( +                        tuple((opt(S::parse), tag("|"), opt(S::parse))), +                        QName::parse, +                    )), +                    pair(opt(S::parse), tag(")*")), +                ), +                |names| Mixed(names), +            ), +            value( +                Mixed(Vec::new()), +                tuple(( +                    tag("("), +                    opt(S::parse), +                    tag("#PCDATA"), +                    opt(S::parse), +                    tag(")"), +                )), +            ), +        ))(input) +    } +} + +/// [20]   	AttlistDecl	   ::=   	'<!ATTLIST' S QName AttDef* S? '>' +/// [52]   	AttlistDecl	   ::=   	'<!ATTLIST' S Name AttDef* S? '>' +impl<'s> Parser<'s, AttlistDecl<'s>> for AttlistDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, AttlistDecl<'s>> { +        map( +            delimited( +                pair(tag("<!ATTLIST"), S::parse), +                pair(QName::parse, many0(AttDef::parse)), +                pair(opt(S::parse), tag(">")), +            ), +            |(element_type, att_defs)| AttlistDecl { +                element_type, +                att_defs, +            }, +        )(input) +    } +} + +/// [21]   	AttDef	   ::=   	S (QName | NSAttName) S AttType S DefaultDecl +/// [53]   	AttDef	   ::=   	S Name S AttType S DefaultDecl +impl<'s> Parser<'s, AttDef<'s>> for AttDef<'s> { +    fn parse(input: &'s str) -> IResult<&str, AttDef<'s>> { +        map( +            tuple(( +                preceded( +                    S::parse, +                    alt(( +                        map(QName::parse, |q_name| AttDefName::QName(q_name)), +                        map(NSAttName::parse, |ns_att_name| { +                            AttDefName::NSAttName(ns_att_name) +                        }), +                    )), +                ), +                preceded(S::parse, AttType::parse), +                preceded(S::parse, DefaultDecl::parse), +            )), +            |(name, att_type, default_decl)| AttDef { +                name, +                att_type, +                default_decl, +            }, +        )(input) +    } +} + +/// [54]   	AttType	   ::=   	StringType | TokenizedType | EnumeratedType +impl<'s> Parser<'s, AttType<'s>> for AttType<'s> { +    fn parse(input: &'s str) -> IResult<&str, AttType<'s>> { +        alt(( +            value(AttType::StringType, StringType::parse), +            map(TokenizedType::parse, |tokenized_type| { +                AttType::TokenizedType(tokenized_type) +            }), +            map(EnumeratedType::parse, |enumerated_type| { +                AttType::EnumeratedType(enumerated_type) +            }), +        ))(input) +    } +} + +/// [55]   	StringType	   ::=   	'CDATA' +impl Parser<'_, StringType> for StringType { +    fn parse(input: &'_ str) -> IResult<&str, StringType> { +        value(StringType, tag("CDATA"))(input) +    } +} + +/// [56]   	TokenizedType	   ::=   	'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' +impl Parser<'_, TokenizedType> for TokenizedType { +    fn parse(input: &'_ str) -> IResult<&str, TokenizedType> { +        alt(( +            value(TokenizedType::ID, tag("ID")), +            // TODO: check if this is required +            // try idrefs first to avoid losing 'S' +            value(TokenizedType::IDRefs, tag("IDREFS")), +            value(TokenizedType::IDRef, tag("IDREF")), +            value(TokenizedType::Entity, tag("ENTITY")), +            value(TokenizedType::Entities, tag("ENTITIES")), +            // same here +            value(TokenizedType::NMTokens, tag("NMTOKENS")), +            value(TokenizedType::NMToken, tag("NMTOKEN")), +        ))(input) +    } +} + +/// [57]   	EnumeratedType	   ::=   	NotationType | Enumeration +impl<'s> Parser<'s, EnumeratedType<'s>> for EnumeratedType<'s> { +    fn parse(input: &'s str) -> IResult<&str, EnumeratedType<'s>> { +        alt(( +            map(NotationType::parse, |notation_type| { +                EnumeratedType::NotationType(notation_type) +            }), +            map(Enumeration::parse, |enumeration| { +                EnumeratedType::Enumeration(enumeration) +            }), +        ))(input) +    } +} + +/// [58]   	NotationType	   ::=   	'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' +impl<'s> Parser<'s, NotationType<'s>> for NotationType<'s> { +    fn parse(input: &'s str) -> IResult<&str, NotationType<'s>> { +        map( +            delimited( +                tuple((tag("NOTATION"), S::parse, tag("("), opt(S::parse))), +                pair( +                    Name::parse, +                    many0(preceded( +                        tuple((opt(S::parse), tag("|"), opt(S::parse))), +                        Name::parse, +                    )), +                ), +                pair(opt(S::parse), tag(")")), +            ), +            |(head, tail)| { +                let notation_type = vec![vec![head], tail].concat(); +                NotationType(notation_type) +            }, +        )(input) +    } +} + +/// [59]   	Enumeration	   ::=   	'(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' +impl<'s> Parser<'s, Enumeration<'s>> for Enumeration<'s> { +    fn parse(input: &'s str) -> IResult<&str, Enumeration<'s>> { +        map( +            delimited( +                pair(tag("("), opt(S::parse)), +                pair( +                    Nmtoken::parse, +                    many0(preceded( +                        tuple((opt(S::parse), tag("|"), opt(S::parse))), +                        Nmtoken::parse, +                    )), +                ), +                pair(opt(S::parse), tag(")")), +            ), +            |(head, tail)| { +                let enumeration = vec![vec![head], tail].concat(); +                Enumeration(enumeration) +            }, +        )(input) +    } +} + +/// [60]   	DefaultDecl	   ::=   	'#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) +impl<'s> Parser<'s, DefaultDecl<'s>> for DefaultDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, DefaultDecl<'s>> { +        alt(( +            value(DefaultDecl::Required, tag("#REQUIRED")), +            value(DefaultDecl::Implied, tag("#IMPLIED")), +            map( +                preceded(opt(pair(tag("#FIXED"), S::parse)), AttValue::parse), +                |att_value| DefaultDecl::Fixed(att_value), +            ), +        ))(input) +    } +} + +/// [61]   	conditionalSect	   ::=   	includeSect | ignoreSect +impl<'s> Parser<'s, ConditionalSect<'s>> for ConditionalSect<'s> { +    fn parse(input: &'s str) -> IResult<&str, ConditionalSect<'s>> { +        alt(( +            map(IncludeSect::parse, |include_sect| { +                ConditionalSect::IncludeSect(include_sect) +            }), +            map(IgnoreSect::parse, |ignore_sect| { +                ConditionalSect::IgnoreSect(ignore_sect) +            }), +        ))(input) +    } +} + +/// [62]   	includeSect	   ::=   	'<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' +impl<'s> Parser<'s, IncludeSect<'s>> for IncludeSect<'s> { +    fn parse(input: &'s str) -> IResult<&str, IncludeSect<'s>> { +        map( +            delimited( +                tuple(( +                    tag("<!["), +                    opt(S::parse), +                    tag("INCLUDE"), +                    opt(S::parse), +                    tag("["), +                )), +                ExtSubsetDecl::parse, +                tag("]]>"), +            ), +            |ext_subset_decl| IncludeSect(ext_subset_decl), +        )(input) +    } +} + +/// [63]   	ignoreSect	   ::=   	'<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' +impl<'s> Parser<'s, IgnoreSect<'s>> for IgnoreSect<'s> { +    fn parse(input: &'s str) -> IResult<&str, IgnoreSect<'s>> { +        map( +            delimited( +                tuple(( +                    tag("<!["), +                    opt(S::parse), +                    tag("IGNORE"), +                    opt(S::parse), +                    tag("["), +                )), +                many0(IgnoreSectContents::parse), +                tag("]]>"), +            ), +            |ignore_sect_contents| IgnoreSect(ignore_sect_contents), +        )(input) +    } +} + +/// [64]   	ignoreSectContents	   ::=   	Ignore ('<![' ignoreSectContents ']]>' Ignore)* +impl<'s> Parser<'s, IgnoreSectContents<'s>> for IgnoreSectContents<'s> { +    fn parse(input: &'s str) -> IResult<&str, IgnoreSectContents<'s>> { +        map( +            pair( +                Ignore::parse, +                many0(tuple(( +                    delimited(tag("<!["), IgnoreSectContents::parse, tag("]]>")), +                    Ignore::parse, +                ))), +            ), +            |(ignore, ignore_list)| IgnoreSectContents { +                ignore, +                ignore_list, +            }, +        )(input) +    } +} + +/// [65]   	Ignore	   ::=   	Char* - (Char* ('<![' | ']]>') Char*) +impl<'s> Parser<'s, Ignore<'s>> for Ignore<'s> { +    fn parse(input: &'s str) -> IResult<&str, Ignore<'s>> { +        map( +            recognize(many_till(Char::parse, peek(alt((tag("<!["), tag("]]>")))))), +            |ignore| Ignore(ignore), +        )(input) +    } +} + +/// [66]   	CharRef	   ::=   	'&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' +impl<'s> Parser<'s, CharRef<'s>> for CharRef<'s> { +    fn parse(input: &'s str) -> IResult<&str, CharRef<'s>> { +        alt(( +            delimited( +                tag("&#"), +                map(take_while(|c| matches!(c, '0'..='9')), |decimal| { +                    CharRef::Decimal(decimal) +                }), +                tag(";"), +            ), +            delimited( +                tag("&#x"), +                map( +                    take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )), +                    |hexadecimal| CharRef::Hexadecimal(hexadecimal), +                ), +                tag(";"), +            ), +        ))(input) +    } +} + +/// [67]   	Reference	   ::=   	EntityRef | CharRef +impl<'s> Parser<'s, Reference<'s>> for Reference<'s> { +    fn parse(input: &'s str) -> IResult<&str, Reference<'s>> { +        alt(( +            map(EntityRef::parse, |entity_ref| { +                Reference::EntityRef(entity_ref) +            }), +            map(CharRef::parse, |char_ref| Reference::CharRef(char_ref)), +        ))(input) +    } +} + +/// [68]   	EntityRef	   ::=   	'&' Name ';' +impl<'s> Parser<'s, EntityRef<'s>> for EntityRef<'s> { +    fn parse(input: &'s str) -> IResult<&str, EntityRef<'s>> { +        map(delimited(tag("&"), Name::parse, tag(";")), |entity_ref| { +            EntityRef(entity_ref) +        })(input) +    } +} + +/// [69]   	PEReference	   ::=   	'%' Name ';' +impl<'s> Parser<'s, PEReference<'s>> for PEReference<'s> { +    fn parse(input: &'s str) -> IResult<&str, PEReference<'s>> { +        map(delimited(tag("%"), Name::parse, tag(";")), |pe_reference| { +            PEReference(pe_reference) +        })(input) +    } +} + +/// [70]   	EntityDecl	   ::=   	GEDecl | PEDecl +impl<'s> Parser<'s, EntityDecl<'s>> for EntityDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, EntityDecl<'s>> { +        alt(( +            map(GEDecl::parse, |ge_decl| EntityDecl::GEDecl(ge_decl)), +            map(PEDecl::parse, |pe_decl| EntityDecl::PEDecl(pe_decl)), +        ))(input) +    } +} + +/// [71]   	GEDecl	   ::=   	'<!ENTITY' S Name S EntityDef S? '>' +impl<'s> Parser<'s, GEDecl<'s>> for GEDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, GEDecl<'s>> { +        map( +            delimited( +                pair(tag("<!ENTITY"), S::parse), +                separated_pair(Name::parse, S::parse, EntityDef::parse), +                pair(opt(S::parse), tag(">")), +            ), +            |(name, entity_def)| GEDecl { name, entity_def }, +        )(input) +    } +} + +/// [72]   	PEDecl	   ::=   	'<!ENTITY' S '%' S Name S PEDef S? '>' +impl<'s> Parser<'s, PEDecl<'s>> for PEDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, PEDecl<'s>> { +        map( +            delimited( +                tuple((tag("<!ENTITY"), S::parse, tag("%"), S::parse)), +                separated_pair(Name::parse, S::parse, PEDef::parse), +                pair(opt(S::parse), tag(">")), +            ), +            |(name, pe_def)| PEDecl { name, pe_def }, +        )(input) +    } +} + +/// [73]   	EntityDef	   ::=   	EntityValue | (ExternalID NDataDecl?) +impl<'s> Parser<'s, EntityDef<'s>> for EntityDef<'s> { +    fn parse(input: &'s str) -> IResult<&str, EntityDef<'s>> { +        alt(( +            map(EntityValue::parse, |entity_value| { +                EntityDef::EntityValue(entity_value) +            }), +            map( +                pair(ExternalID::parse, opt(NDataDecl::parse)), +                |(external_id, ndata_decl)| EntityDef::ExternalID { +                    external_id, +                    ndata_decl, +                }, +            ), +        ))(input) +    } +} + +/// [74]   	PEDef	   ::=   	EntityValue | ExternalID +impl<'s> Parser<'s, PEDef<'s>> for PEDef<'s> { +    fn parse(input: &'s str) -> IResult<&str, PEDef<'s>> { +        alt(( +            map(EntityValue::parse, |entity_value| { +                PEDef::EntityValue(entity_value) +            }), +            map(ExternalID::parse, |external_id| { +                PEDef::ExternalID(external_id) +            }), +        ))(input) +    } +} + +/// [75]   	ExternalID	   ::=   	'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral +// pub fn external_id(input: &str) -> IResult<&str, ExternalID> { +impl<'s> Parser<'s, ExternalID<'s>> for ExternalID<'s> { +    fn parse(input: &'s str) -> IResult<&str, ExternalID<'s>> { +        alt(( +            map( +                preceded(pair(tag("SYSTEM"), S::parse), SystemLiteral::parse), +                |system_identifier| ExternalID::SYSTEM { system_identifier }, +            ), +            map( +                preceded( +                    pair(tag("PUBLIC"), S::parse), +                    separated_pair(PubidLiteral::parse, S::parse, SystemLiteral::parse), +                ), +                |(public_identifier, system_identifier)| ExternalID::PUBLIC { +                    public_identifier, +                    system_identifier, +                }, +            ), +        ))(input) +    } +} + +/// [76]   	NDataDecl	   ::=   	S 'NDATA' S Name +impl<'s> Parser<'s, NDataDecl<'s>> for NDataDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, NDataDecl<'s>> { +        map( +            preceded(tuple((S::parse, tag("NDATA"), S::parse)), Name::parse), +            |n_data_decl| NDataDecl(n_data_decl), +        )(input) +    } +} + +/// [77]   	TextDecl	   ::=   	'<?xml' VersionInfo? EncodingDecl S? '?>' +impl<'s> Parser<'s, TextDecl<'s>> for TextDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, TextDecl<'s>> { +        map( +            delimited( +                tag("<?xml"), +                pair( +                    opt(VersionInfo::parse), +                    terminated(EncodingDecl::parse, opt(S::parse)), +                ), +                tag("?>"), +            ), +            |(version_info, encoding_decl)| TextDecl { +                version_info, +                encoding_decl, +            }, +        )(input) +    } +} + +/// [78]   	extParsedEnt	   ::=   	TextDecl? content +impl<'s> Parser<'s, ExtParsedEnt<'s>> for ExtParsedEnt<'s> { +    fn parse(input: &'s str) -> IResult<&str, ExtParsedEnt<'s>> { +        map( +            pair(opt(TextDecl::parse), Content::parse), +            |(text_decl, content)| ExtParsedEnt { text_decl, content }, +        )(input) +    } +} + +/// [80]   	EncodingDecl	   ::=   	S 'encoding' Eq ('"' EncName '"' | "'" EncName +impl<'s> Parser<'s, EncodingDecl<'s>> for EncodingDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, EncodingDecl<'s>> { +        map( +            preceded( +                tuple((S::parse, tag("encoding"), Eq::parse)), +                alt(( +                    delimited(char('"'), EncName::parse, char('"')), +                    delimited(char('\''), EncName::parse, char('\'')), +                )), +            ), +            |encoding_decl| EncodingDecl(encoding_decl), +        )(input) +    } +} + +/// [81]   	EncName	   ::=   	[A-Za-z] ([A-Za-z0-9._] | '-')* +impl<'s> Parser<'s, EncName<'s>> for EncName<'s> { +    fn parse(input: &'s str) -> IResult<&str, EncName<'s>> { +        map( +            recognize(pair( +                satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )), +                many0(satisfy( +                    |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ), +                )), +            )), +            |enc_name| EncName(enc_name), +        )(input) +    } +} + +/// [82]   	NotationDecl	   ::=   	'<!NOTATION' S Name S (ExternalID | PublicID) S? '>' +impl<'s> Parser<'s, NotationDecl<'s>> for NotationDecl<'s> { +    fn parse(input: &'s str) -> IResult<&str, NotationDecl<'s>> { +        map( +            delimited( +                pair(tag("<!NOTATION"), S::parse), +                separated_pair( +                    Name::parse, +                    S::parse, +                    alt(( +                        map(ExternalID::parse, |external_id| { +                            NotationDeclID::External(external_id) +                        }), +                        map(PublicID::parse, |public_id| { +                            NotationDeclID::Public(public_id) +                        }), +                    )), +                ), +                pair(opt(S::parse), tag(">")), +            ), +            |(name, id)| NotationDecl { name, id }, +        )(input) +    } +} + +/// [83]   	PublicID	   ::=   	'PUBLIC' S PubidLiteral +impl<'s> Parser<'s, PublicID<'s>> for PublicID<'s> { +    fn parse(input: &'s str) -> IResult<&str, PublicID<'s>> { +        map( +            preceded(pair(tag("PUBLIC"), S::parse), PubidLiteral::parse), +            |public_id| PublicID(public_id), +        )(input) +    } +} + +#[cfg(test)] +mod tests { +    use std::num::NonZero; + +    use super::*; + +    #[test] +    fn test_char_data() { +        assert_eq!( +            Ok(("&def]]>ghi", CharData("abc"))), +            CharData::parse("abc&def]]>ghi") +        ); +        assert_eq!( +            Ok(("]]>ghi", CharData("abcdef"))), +            CharData::parse("abcdef]]>ghi") +        ); +        assert_eq!( +            Ok(("&defghi", CharData("abc"))), +            CharData::parse("abc&defghi") +        ); +        assert_eq!( +            Ok(("]]>def&ghi", CharData("abc"))), +            CharData::parse("abc]]>def&ghi") +        ); +        assert_eq!( +            Ok(("&ghi", CharData("abc]>def"))), +            CharData::parse("abc]>def&ghi") +        ); +        assert_eq!( +            Err(Err::Incomplete(nom::Needed::Size( +                NonZero::new(3usize).unwrap() +            ))), +            CharData::parse("abcdefghi") +        ); +    } + +    #[test] +    fn test_comment() { +        assert_eq!(Ok(("", Comment(""))), Comment::parse("<!---->")); +        assert_eq!(Ok(("", Comment("asdf"))), Comment::parse("<!--asdf-->")); +        assert_eq!(Ok(("", Comment("as-df"))), Comment::parse("<!--as-df-->")); +        assert_eq!( +            Err(Err::Incomplete(nom::Needed::Size( +                NonZero::new(2usize).unwrap() +            ))), +            Comment::parse("<!--asdf") +        ); +    } + +    #[test] +    fn test_pi_target() { +        assert_eq!(Ok((" ", PITarget(Name("asdf")))), PITarget::parse("asdf ")); +        assert_eq!( +            Ok((" ", PITarget(Name("xmlasdf")))), +            PITarget::parse("xmlasdf ") +        ); +        assert_eq!( +            Err(Err::Error(Error { +                input: "xml ", +                code: ErrorKind::Tag +            })), +            PITarget::parse("xml ") +        ); +        assert_eq!( +            Err(Err::Error(Error { +                input: "xMl ", +                code: ErrorKind::Tag +            })), +            PITarget::parse("xMl ") +        ); +    } + +    #[test] +    fn test_cd_sect() { +        assert_eq!( +            Ok(("", CDSect(CData("<greeting>Hello, world!</greeting>")))), +            CDSect::parse("<![CDATA[<greeting>Hello, world!</greeting>]]>") +        ) +    } + +    #[test] +    fn test_cd_start() { +        assert_eq!(Ok(("asdf", CDStart)), CDStart::parse("<![CDATA[asdf")) +    } + +    #[test] +    fn test_cdata() { +        assert_eq!(Ok(("]]>asdf", CData("asdf"))), CData::parse("asdf]]>asdf")); +        assert_eq!( +            Ok(("]]>asdf", CData("<![CDATA[asdf"))), +            CData::parse("<![CDATA[asdf]]>asdf") +        ); +        assert_eq!( +            Ok(("]]>asdf", CData("<greeting>Hello, world!</greeting>"))), +            CData::parse("<greeting>Hello, world!</greeting>]]>asdf") +        ) +    } + +    #[test] +    fn test_cd_end() { +        assert_eq!(Ok(("asdf", CDEnd)), CDEnd::parse("]]>asdf")) +    } +}  | 
