aboutsummaryrefslogblamecommitdiffstats
path: root/src/xml/mod.rs
blob: 47c17795ae8142014596482a3945a141cbafa97a (plain) (tree)
1
2
3
4
5
6
7
8
9


              

                                                    

                         
                                            
      
                                                        

                                     
                                                                             


                         
                                               
                                                                                           

                                                   
 


































































































                                                                                
                                                                 




                                                                    
                     






                                                                                                                                                                                            
                         




                                                                 
                              






                                                                                                                                                                                                                                                                                                                                                                       
                         









                                                                                                                            
                            




                                                                 
                             




                                                                   
                               




                                                       
                                




                                                                         

                          




                                 
                                                





                                                                                           









                                                                              



                       









                                                                              




                       
                                             





                                                                             






                                                                              



                       






                                                                              




                       
                                     







                                                                                 
                                    











                                                                                               
                              






                                                                                                      
                                

                                                                              













                                                                                                   
                

                            
            

                                                                        
        

                       


                                                                             

 
                               








                                                                                 

                       




                                                                                            







                                                                             
          
                                    


                        
            

 
                                













                                                                                            
                                                          




                                                           
                               




                                                        
                             




                                                                     
                             




                                                    
                       
                        











                                                                           

                        
                              

                                            


                                                                                         






                                                                    
                         

                          
          
            

 
                                  











                                                                                                        

                                                        

 

                       












                                                              
 

                       

                         
                                        










                                                        

                            
                    


                                        
                                                                                                                                        





                                                                                                                 
                       


















                                                                  

                       













                                                        

                                   


                               
                                                   



                                                                       
                                                         
           
                                                                          


              

                         
                                 








                                                                                                                    

                                                














                                                             
                          


                                       
                                                                       








                                                            
 
                                   




                                                       
                                                                                          










                                                                          
 
                       


















                                                                                                               

                                                 
                       
                      

                                              



                                                                        





                                                                       


             
                       
                     
                    

                                   
                                                                       
                                                                      



                                                  
                                                        




                                                       
 











                                                                                         
                                                           








                                                                                  
 




                                                                
 
                       
                     
                    
 
                                                         

                                                        



                                                             

 
                       
                          






                             
                       
                        






















                                                                                                            
                       
                             
                    

                                   
                                                                                




                                                                                                      
                                                        





                                                               

                            
                    

                                 
                                                                                        




                                                                                       
                                                   






                                                                

                          














                                                                                  

                       

















                                                           

                           


                       

                         




















                                                                                                   

                       
                    


                       

                       


                         
                                                                                 




                                                                                
                                                       








                                                             

                                   







                                                                             

                                                         




                          

                                







                                                                             

                                                      





                          
                       

                                                                                                             





                                                                                                            
                                                                           










                                                                        

                            
                            

                              
                                                                                  




                                                                                 
                                         








                                                
                




                             
                       
                         


                                  
                                                                                     



                                                                      








                                                                    










                                                 

                       
















                                                                                   
                                  




                                                              

                        























                                                                                                                          

                             














                                                                          

                                           

















                                                                                                 

                                             

















                                                                                          

                          















                                                                                                   
                              














                                                                        
                                              











                                                                                             
                                                       











                                                                                                  
                                   




















                                                                                                       
                              




                                                                             

                       


                         

                                                                                
















                                                                                

 

                        










                                                                       
                                 




                                                            
                                   




                                                                

                         










                                                               

                       














                                                                            

                       














                                                                              

                        





















                                                                                     

                    











                                                                       
 

                         




























                                                                                                                  
                                 




                                                                
                         

















                                                                                         
                             


                                    
                                                                 
                                                                   


                                                               

 
                                        










                                                                                              
                               









                                                                                  

                             


                           

                             























                                                                                                    
                                




                                                                       

            

                          






                                                                          



                                                                          
                                             


                                  
     
































                                                                

































                                                                          
 
use std::char;

use nom::{
    branch::alt,
    bytes::streaming::{is_a, tag, take, take_while},
    character::{
        complete::one_of,
        streaming::{char, none_of, satisfy},
    },
    combinator::{map, not, opt, peek, recognize, value},
    error::{Error, ErrorKind},
    multi::{many0, many1, many_till},
    sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
    Err, IResult, Parser,
};

// parser: parses tokens from lexer into events
// no well formedness, validity, or data model, simple translation of input into rust types
// output is a rust representation of the input xml
// types could be used for xml production too?

mod parsers;

#[derive(Clone, Debug)]
pub enum NSAttName<'s> {
    PrefixedAttName(PrefixedAttName<'s>),
    DefaultAttName,
}
/// [1]   	NSAttName	   ::=   	PrefixedAttName | DefaultAttName
pub fn ns_att_name(input: &str) -> IResult<&str, NSAttName> {
    alt((
        map(prefixed_att_name, |prefixed_att_name| {
            NSAttName::PrefixedAttName(prefixed_att_name)
        }),
        value(NSAttName::DefaultAttName, default_att_name),
    ))(input)
}

#[derive(Clone, Debug)]
pub struct PrefixedAttName<'s>(NCName<'s>);
/// [2]   	PrefixedAttName	   ::=   	'xmlns:' NCName
pub fn prefixed_att_name(input: &str) -> IResult<&str, PrefixedAttName> {
    map(preceded(tag("xmlns:"), nc_name), |nc_name| {
        PrefixedAttName(nc_name)
    })(input)
}

#[derive(Clone, Debug)]
pub struct DefaultAttName;
/// [3]   	DefaultAttName	   ::=   	'xmlns';
pub fn default_att_name(input: &str) -> IResult<&str, DefaultAttName> {
    value(DefaultAttName, tag("xmlns"))(input)
}

#[derive(Clone, Debug)]
pub struct NCName<'s>(&'s str);
/// [4]   	NCName	   ::=   	Name - (Char* ':' Char*)
pub fn nc_name(input: &str) -> IResult<&str, NCName> {
    map(
        recognize(pair(
            recognize(name_start_char).and_then(satisfy(|c| c != ':')),
            many_till(name_char, peek(char(':'))),
        )),
        |nc_name| NCName(nc_name),
    )(input)
}

#[derive(Clone, Debug)]
pub enum QName<'s> {
    PrefixedName(PrefixedName<'s>),
    UnprefixedName(UnprefixedName<'s>),
}
/// [7]   	QName	   ::=   	PrefixedName | UnprefixedName
pub fn q_name(input: &str) -> IResult<&str, QName> {
    alt((
        map(prefixed_name, |prefixed_name| {
            QName::PrefixedName(prefixed_name)
        }),
        map(unprefixed_name, |unprefixed_name| {
            QName::UnprefixedName(unprefixed_name)
        }),
    ))(input)
}

#[derive(Clone, Debug)]
pub struct PrefixedName<'s> {
    prefix: Prefix<'s>,
    local_part: LocalPart<'s>,
}
/// [8]   	PrefixedName	   ::=   	Prefix ':' LocalPart
pub fn prefixed_name(input: &str) -> IResult<&str, PrefixedName> {
    map(
        separated_pair(prefix, char(':'), local_part),
        |(prefix, local_part)| PrefixedName { prefix, local_part },
    )(input)
}

#[derive(Clone, Debug)]
pub struct UnprefixedName<'s>(LocalPart<'s>);
/// [9]   	UnprefixedName	   ::=   	LocalPart
pub fn unprefixed_name(input: &str) -> IResult<&str, UnprefixedName> {
    map(local_part, |local_part| UnprefixedName(local_part))(input)
}

#[derive(Clone, Debug)]
pub struct Prefix<'s>(NCName<'s>);
/// [10]   	Prefix	   ::=   	NCName
pub fn prefix(input: &str) -> IResult<&str, Prefix> {
    map(nc_name, |nc_name| Prefix(nc_name))(input)
}

#[derive(Clone, Debug)]
pub struct LocalPart<'s>(NCName<'s>);
/// [11]   	LocalPart	   ::=   	NCName
pub fn local_part(input: &str) -> IResult<&str, LocalPart> {
    map(nc_name, |nc_name| LocalPart(nc_name))(input)
}

// xml spec

pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [1]   	document	   ::=   	prolog element Misc*
pub fn document(input: &str) -> IResult<&str, Document> {
    tuple((prolog, element, many0(misc)))(input)
}

pub type Char = char;
/// [2]   	Char	   ::=   	#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]	/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
pub fn xmlchar(input: &str) -> IResult<&str, Char> {
    satisfy(
        |c| matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}'),
    )(input)
}

pub type S<'s> = &'s str;
/// [3]   	S	   ::=   	(#x20 | #x9 | #xD | #xA)+
pub fn s(input: &str) -> IResult<&str, S> {
    is_a("\u{20}\u{9}\u{D}\u{A}")(input)
}

pub type NameStartChar = char;
/// [4]   	NameStartChar	   ::=   	":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
pub fn name_start_char(input: &str) -> IResult<&str, NameStartChar> {
    satisfy(
        |c| matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}'),
    )(input)
}

pub type NameChar = char;
/// [4a]   	NameChar	   ::=   	NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
pub fn name_char(input: &str) -> IResult<&str, NameChar> {
    alt((
        name_start_char,
        satisfy(
            |c| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'),
        ),
    ))(input)
}

pub type Name<'s> = &'s str;
/// [5]   	Name	   ::=   	NameStartChar (NameChar)*
pub fn name(input: &str) -> IResult<&str, Name> {
    recognize(pair(name_start_char, many0(name_char)))(input)
}

pub type Names<'s> = &'s str;
/// [6]   	Names	   ::=   	Name (#x20 Name)*
pub fn names(input: &str) -> IResult<&str, Names> {
    recognize(pair(name, many0(pair(char('\u{20}'), name))))(input)
}

pub type Nmtoken<'s> = &'s str;
/// [7]   	Nmtoken	   ::=   	(NameChar)+
pub fn nmtoken(input: &str) -> IResult<&str, Nmtoken> {
    recognize(many1(name_char))(input)
}

pub type Nmtokens<'s> = &'s str;
/// [8]   	Nmtokens	   ::=   	Nmtoken (#x20 Nmtoken)*
pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {
    recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
}

#[derive(Clone, Debug)]
pub enum LiteralData<'s> {
    String(&'s str),
    PEReference(PEReference<'s>),
    Reference(Reference<'s>),
}

pub type EntityValue<'s> = Vec<LiteralData<'s>>;
/// [9]   	EntityValue	   ::=   	'"' ([^%&"] | PEReference | Reference)* '"'
///			|  "'" ([^%&'] | PEReference | Reference)* "'"
pub fn entity_value(input: &str) -> IResult<&str, EntityValue> {
    alt((
        delimited(
            char('"'),
            many0(alt((
                map(
                    recognize(many_till(take(1usize), peek(one_of("%&\"")))),
                    |string| LiteralData::String(string),
                ),
                map(pe_reference, |pe_reference| {
                    LiteralData::PEReference(pe_reference)
                }),
                map(reference, |reference| LiteralData::Reference(reference)),
            ))),
            char('"'),
        ),
        delimited(
            char('\''),
            many0(alt((
                map(
                    recognize(many_till(take(1usize), peek(one_of("%&'")))),
                    |string| LiteralData::String(string),
                ),
                map(pe_reference, |pe_reference| {
                    LiteralData::PEReference(pe_reference)
                }),
                map(reference, |reference| LiteralData::Reference(reference)),
            ))),
            char('\''),
        ),
    ))(input)
}

pub type AttValue<'s> = Vec<LiteralData<'s>>;
/// [10]   	AttValue	   ::=   	'"' ([^<&"] | Reference)* '"'
/// 			|  "'" ([^<&'] | Reference)* "'"
pub fn att_value(input: &str) -> IResult<&str, AttValue> {
    alt((
        delimited(
            char('"'),
            many0(alt((
                map(
                    recognize(many_till(take(1usize), peek(one_of("%&\"")))),
                    |string| LiteralData::String(string),
                ),
                map(reference, |reference| LiteralData::Reference(reference)),
            ))),
            char('"'),
        ),
        delimited(
            char('\''),
            many0(alt((
                map(
                    recognize(many_till(take(1usize), peek(one_of("%&'")))),
                    |string| LiteralData::String(string),
                ),
                map(reference, |reference| LiteralData::Reference(reference)),
            ))),
            char('\''),
        ),
    ))(input)
}

pub type SystemLiteral<'s> = &'s str;
/// [11]   	SystemLiteral	   ::=   	('"' [^"]* '"') | ("'" [^']* "'")
pub fn system_literal(input: &str) -> IResult<&str, SystemLiteral> {
    alt((
        delimited(char('"'), recognize(many0(none_of("\""))), char('"')),
        delimited(char('\''), recognize(many0(none_of("'"))), char('\'')),
    ))(input)
}

pub type PubidLiteral<'s> = &'s str;
/// [12]   	PubidLiteral	   ::=   	'"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
pub fn pubid_literal(input: &str) -> IResult<&str, PubidLiteral> {
    alt((
        delimited(char('"'), recognize(many0(pubid_char)), char('"')),
        delimited(
            char('\''),
            recognize(many0(recognize(not(char('\''))).and_then(pubid_char))),
            char('\''),
        ),
    ))(input)
}

pub type PubidChar<'s> = char;
/// [13]   	PubidChar	   ::=   	#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> {
    satisfy(|c| matches!(c, '\u{20}' | '\u{D}' | '\u{A}' | 'a'..='z' | 'A'..='Z' | '0'..='9'))(
        input,
    )
}

pub type CharData<'s> = &'s str;
/// [14]   	CharData	   ::=   	[^<&]* - ([^<&]* ']]>' [^<&]*)
pub fn char_data(input: &str) -> IResult<&str, CharData> {
    recognize(many_till(
        none_of("<&"),
        peek(alt((recognize(one_of("<&")), tag("]]>")))),
    ))(input)

    // let tagg: &str;
    // if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {
    //     if let Ok((_, tagg2)) =
    //         peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input)
    //     {
    //         if tagg1.len() < tagg2.len() {
    //             tagg = tagg1
    //         } else {
    //             tagg = tagg2
    //         }
    //     } else {
    //         tagg = tagg1;
    //     }
    // } else {
    //     (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?
    // }
    // tag(tagg)(input)

    // recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input)
    // recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input)
    // take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input)
}

pub type Comment<'s> = &'s str;
/// Comment	   ::=   	'<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
pub fn comment(input: &str) -> IResult<&str, Comment> {
    delimited(
        tag("<!--"),
        recognize(many_till(xmlchar, peek(tag("--")))),
        tag("-->"),
    )(input)
}

#[derive(Clone, Debug)]
pub struct PI<'s> {
    target: &'s str,
    instruction: Option<&'s str>,
}
/// [16]   	PI	   ::=   	'<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
pub fn pi(input: &str) -> IResult<&str, PI> {
    map(
        delimited(
            tag("<?"),
            pair(
                pi_target,
                opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))),
            ),
            tag("?>"),
        ),
        |(target, instruction)| PI {
            target,
            instruction,
        },
    )(input)
}

pub type PITarget<'s> = &'s str;
/// [17]   	PITarget	   ::=   	Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
pub fn pi_target(input: &str) -> IResult<&str, PITarget> {
    let (rest, name) = name(input)?;
    if name.to_lowercase() == "xml" {
        return Err(Err::Error(Error {
            input,
            // TODO: check if better error to return
            code: ErrorKind::Tag,
        }));
    } else {
        return Ok((rest, name));
    }
}

pub type CDSect<'s> = (CDStart<'s>, CData<'s>, CDEnd<'s>);
/// [18]   	CDSect	   ::=   	CDStart CData CDEnd
pub fn cd_sect(input: &str) -> IResult<&str, CDSect> {
    tuple((cd_start, cdata, cd_end))(input)
}

pub type CDStart<'s> = &'s str;
/// [19]   	CDStart	   ::=   	'<![CDATA['
pub fn cd_start(input: &str) -> IResult<&str, CDStart> {
    tag("<![CDATA[")(input)
}

pub type CData<'s> = &'s str;
/// [20]   	CData	   ::=   	(Char* - (Char* ']]>' Char*))
pub fn cdata(input: &str) -> IResult<&str, CData> {
    recognize(many_till(xmlchar, peek(tag("]]>"))))(input)
}

pub type CDEnd<'s> = &'s str;
/// [21]   	CDEnd	   ::=   	']]>'
pub fn cd_end(input: &str) -> IResult<&str, CDEnd> {
    tag("]]>")(input)
}

pub type Prolog<'s> = (
    Option<XMLDecl<'s>>,
    Vec<Misc<'s>>,
    Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,
);
/// [22]   	prolog	   ::=   	XMLDecl? Misc* (doctypedecl Misc*)?
pub fn prolog(input: &str) -> IResult<&str, Prolog> {
    tuple((
        opt(xml_decl),
        many0(misc),
        opt(tuple((doctypedecl, many0(misc)))),
    ))(input)
}

#[derive(Debug)]
pub struct XMLDecl<'s> {
    version_info: VersionInfo,
    encoding_decl: Option<EncodingDecl<'s>>,
    sd_decl: Option<SDDecl>,
}
/// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {
    map(
        delimited(
            tag("<?xml"),
            tuple((version_info, opt(encoding_decl), opt(sd_decl))),
            pair(opt(s), tag("?>")),
        ),
        |(version_info, encoding_decl, sd_decl)| XMLDecl {
            version_info,
            encoding_decl,
            sd_decl,
        },
    )(input)
}

pub type VersionInfo = VersionNum;
/// [24]   	VersionInfo	   ::=   	S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
pub fn version_info(input: &str) -> IResult<&str, VersionInfo> {
    preceded(
        tuple((s, tag("version"), eq)),
        alt((
            delimited(char('\''), version_num, char('\'')),
            delimited(char('"'), version_num, char('"')),
        )),
    )(input)
}

/// [25]   	Eq	   ::=   	S? '=' S?
pub fn eq(input: &str) -> IResult<&str, &str> {
    recognize(tuple((opt(s), char('='), opt(s))))(input)
}

#[derive(Clone, Debug)]
pub enum VersionNum {
    One,
    OneDotOne,
}
/// [26]   	VersionNum	   ::=   	'1.' [0-9]+
pub fn version_num(input: &str) -> IResult<&str, VersionNum> {
    preceded(
        tag("1."),
        alt((
            value(VersionNum::One, char('0')),
            value(VersionNum::OneDotOne, char('1')),
        )),
    )(input)
}

#[derive(Clone, Debug)]
pub enum Misc<'s> {
    Comment(Comment<'s>),
    PI(PI<'s>),
    // TODO: how to deal with whitespace
    S,
}
/// [27]   	Misc	   ::=   	Comment | PI | S
pub fn misc(input: &str) -> IResult<&str, Misc> {
    alt((
        map(comment, |comment| Misc::Comment(comment)),
        map(pi, |pi| Misc::PI(pi)),
        value(Misc::S, s),
    ))(input)
}

#[derive(Debug)]
pub struct DoctypeDecl<'s> {
    name: QName<'s>,
    external_id: Option<ExternalID<'s>>,
    int_subset: Option<IntSubset<'s>>,
}
/// [16]   	doctypedecl	   ::=   	'<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
/// [28]   	doctypedecl	   ::=   	'<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
    map(
        delimited(
            pair(tag("<!DOCTYPE"), s),
            tuple((
                q_name,
                opt(preceded(s, external_id)),
                preceded(
                    opt(s),
                    opt(terminated(
                        delimited(tag("["), int_subset, tag("]")),
                        opt(s),
                    )),
                ),
            )),
            tag(">"),
        ),
        |(name, external_id, int_subset)| DoctypeDecl {
            name,
            external_id,
            int_subset,
        },
    )(input)
}

#[derive(Clone, Debug)]
pub enum DeclSep<'s> {
    PEReference(PEReference<'s>),
    // TODO: tackle whitespace
    S,
}
/// [28a]   	DeclSep	   ::=   	PEReference | S
pub fn decl_sep(input: &str) -> IResult<&str, DeclSep> {
    alt((
        map(pe_reference, |pe_reference| {
            DeclSep::PEReference(pe_reference)
        }),
        value(DeclSep::S, s),
    ))(input)
}

#[derive(Debug)]
pub enum IntSubsetDeclaration<'s> {
    MarkupDecl(MarkupDecl<'s>),
    DeclSep(DeclSep<'s>),
}
type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;
/// [28b]   	intSubset	   ::=   	(markupdecl | DeclSep)*
pub fn int_subset(input: &str) -> IResult<&str, IntSubset> {
    many0(alt((
        map(markup_decl, |markup_decl| {
            IntSubsetDeclaration::MarkupDecl(markup_decl)
        }),
        map(decl_sep, |decl_sep| IntSubsetDeclaration::DeclSep(decl_sep)),
    )))(input)
}

#[derive(Debug)]
pub enum MarkupDecl<'s> {
    Elementdecl(Elementdecl<'s>),
    AttlistDecl(AttlistDecl<'s>),
    EntityDecl(EntityDecl<'s>),
    NotationDecl(NotationDecl<'s>),
    PI(PI<'s>),
    Comment(Comment<'s>),
}
/// [29]   	markupdecl	   ::=   	elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> {
    alt((
        map(elementdecl, |elementdecl| {
            MarkupDecl::Elementdecl(elementdecl)
        }),
        map(attlist_decl, |attlist_decl| {
            MarkupDecl::AttlistDecl(attlist_decl)
        }),
        map(entity_decl, |entity_decl| {
            MarkupDecl::EntityDecl(entity_decl)
        }),
        map(notation_decl, |notation_decl| {
            MarkupDecl::NotationDecl(notation_decl)
        }),
        map(pi, |pi| MarkupDecl::PI(pi)),
        map(comment, |comment| MarkupDecl::Comment(comment)),
    ))(input)
}

pub struct ExtSubset<'s> {
    text_decl: Option<TextDecl<'s>>,
    ext_subset_decl: ExtSubsetDecl<'s>,
}
/// [30]   	extSubset	   ::=   	TextDecl? extSubsetDecl
pub fn ext_subset(input: &str) -> IResult<&str, ExtSubset> {
    map(
        pair(opt(text_decl), ext_subset_decl),
        |(text_decl, ext_subset_decl)| ExtSubset {
            text_decl,
            ext_subset_decl,
        },
    )(input)
}

pub enum ExtSubsetDeclaration<'s> {
    MarkupDecl(MarkupDecl<'s>),
    ConditionalSect(ConditionalSect<'s>),
    DeclSep(DeclSep<'s>),
}
type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;
/// [31]   	extSubsetDecl	   ::=   	( markupdecl | conditionalSect | DeclSep)*
pub fn ext_subset_decl(input: &str) -> IResult<&str, ExtSubsetDecl> {
    many0(alt((
        map(markup_decl, |markup_decl| {
            ExtSubsetDeclaration::MarkupDecl(markup_decl)
        }),
        map(conditional_sect, |conditional_sect| {
            ExtSubsetDeclaration::ConditionalSect(conditional_sect)
        }),
        map(decl_sep, |decl_sep| ExtSubsetDeclaration::DeclSep(decl_sep)),
    )))(input)
}

pub type SDDecl = bool;
/// [32]   	SDDecl	   ::=   	S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
    preceded(
        tuple((s, tag("standalone"), eq)),
        alt((
            delimited(
                char('\''),
                alt((value(true, tag("yes")), value(false, tag("no")))),
                char('\''),
            ),
            delimited(
                char('"'),
                alt((value(true, tag("yes")), value(false, tag("no")))),
                char('"'),
            ),
        )),
    )(input)
}

// (Productions 33 through 38 have been removed.)

#[derive(Debug, Clone)]
pub enum Element<'s> {
    Empty(EmptyElemTag<'s>),
    NotEmpty(STag<'s>, Content<'s>, ETag<'s>),
}
/// [39]   	element	   ::=   	EmptyElemTag | STag content ETag
pub fn element(input: &str) -> IResult<&str, Element> {
    alt((
        map(empty_elem_tag, |empty_elem_tag| {
            Element::Empty(empty_elem_tag)
        }),
        map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| {
            Element::NotEmpty(s_tag, content, e_tag)
        }),
    ))(input)
}

#[derive(Debug, Clone)]
pub struct STag<'s> {
    name: QName<'s>,
    attributes: Vec<Attribute<'s>>,
}
/// [12]   	STag	   ::=   	'<' QName (S Attribute)* S? '>'
/// [40]   	STag	   ::=   	'<' Name (S Attribute)* S? '>'
pub fn s_tag(input: &str) -> IResult<&str, STag> {
    map(
        delimited(
            tag("<"),
            pair(q_name, many0(preceded(s, attribute))),
            pair(opt(s), tag(">")),
        ),
        |(name, attributes)| STag { name, attributes },
    )(input)
}

#[derive(Debug, Clone)]
pub enum Attribute<'s> {
    NamespaceDeclaration {
        ns_name: NSAttName<'s>,
        value: AttValue<'s>,
    },
    Attribute {
        name: QName<'s>,
        value: AttValue<'s>,
    },
}
/// [15]   	Attribute	   ::=   	NSAttName Eq AttValue | QName Eq AttValue
pub fn attribute(input: &str) -> IResult<&str, Attribute> {
    alt((
        map(
            separated_pair(ns_att_name, eq, att_value),
            |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value },
        ),
        map(separated_pair(q_name, eq, att_value), |(name, value)| {
            Attribute::Attribute { name, value }
        }),
    ))(input)
}
// pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
/// [41]   	Attribute	   ::=   	Name Eq AttValue
// pub fn attribute(input: &str) -> IResult<&str, Attribute> {
//     separated_pair(name, eq, att_value)(input)
// }

#[derive(Debug, Clone)]
pub struct ETag<'s> {
    name: QName<'s>,
}
/// [13]   	ETag	   ::=   	'</' QName S? '>'
/// [42]   	ETag	   ::=   	'</' Name S? '>'
pub fn e_tag(input: &str) -> IResult<&str, ETag> {
    map(
        delimited(tag("</"), q_name, pair(opt(s), tag(">"))),
        |name| ETag { name },
    )(input)
}

#[derive(Debug, Clone)]
pub enum ContentItem<'s> {
    // CharData(&'s str),
    Element(Element<'s>),
    Reference(Reference<'s>),
    CDSect(CDSect<'s>),
    PI(PI<'s>),
    Comment(Comment<'s>),
}
#[derive(Debug, Clone)]
pub struct Content<'s> {
    char_data: Option<CharData<'s>>,
    content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
}
/// [43]   	content	   ::=   	CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
pub fn content(input: &str) -> IResult<&str, Content> {
    map(
        pair(
            opt(char_data),
            many0(pair(
                alt((
                    map(element, |element| ContentItem::Element(element)),
                    map(reference, |reference| ContentItem::Reference(reference)),
                    map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)),
                    map(pi, |pi| ContentItem::PI(pi)),
                    map(comment, |comment| ContentItem::Comment(comment)),
                )),
                opt(char_data),
            )),
        ),
        |(char_data, content)| Content { char_data, content },
    )(input)
}

#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
    name: QName<'s>,
    attributes: Vec<Attribute<'s>>,
}
/// [14]   	EmptyElemTag	   ::=   	'<' QName (S Attribute)* S? '/>'
/// [44]   	EmptyElemTag	   ::=   	'<' Name (S Attribute)* S? '/>'	[WFC: Unique Att Spec]
pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {
    map(
        delimited(
            tag("<"),
            pair(q_name, many0(preceded(s, attribute))),
            pair(opt(s), tag("/>")),
        ),
        |(name, attributes)| EmptyElemTag { name, attributes },
    )(input)
}

#[derive(Debug)]
pub struct Elementdecl<'s> {
    name: QName<'s>,
    contentspec: Contentspec<'s>,
}
/// [17]   	elementdecl	   ::=   	'<!ELEMENT' S QName S contentspec S? '>'
/// [45]   	elementdecl	   ::=   	'<!ELEMENT' S Name S contentspec S? '>'
pub fn elementdecl(input: &str) -> IResult<&str, Elementdecl> {
    map(
        delimited(
            pair(tag("<!ELEMENT"), s),
            separated_pair(q_name, s, contentspec),
            pair(opt(s), tag(">")),
        ),
        |(name, contentspec)| Elementdecl { name, contentspec },
    )(input)
}

// TODO: casings???
#[derive(Clone, Debug)]
pub enum Contentspec<'s> {
    Empty,
    Any,
    Mixed(Mixed<'s>),
    Children(Children<'s>),
}
/// [46]   	contentspec	   ::=   	'EMPTY' | 'ANY' | Mixed | children
pub fn contentspec(input: &str) -> IResult<&str, Contentspec> {
    alt((
        value(Contentspec::Empty, tag("EMPTY")),
        value(Contentspec::Any, tag("ANY")),
        map(mixed, |mixed| Contentspec::Mixed(mixed)),
        map(children, |children| Contentspec::Children(children)),
    ))(input)
}

#[derive(Clone, Debug)]
pub enum Occurence {
    Once,
    Optional,
    Many0,
    Many1,
}
/// Occurence ::= ('?' | '*' | '+')?
pub fn occurence(input: &str) -> IResult<&str, Occurence> {
    map(
        opt(alt((tag("?"), tag("*"), tag("+")))),
        |occurence| match occurence {
            Some("?") => Occurence::Optional,
            Some("*") => Occurence::Many0,
            Some("+") => Occurence::Many1,
            _ => Occurence::Once,
        },
    )(input)
}

#[derive(Clone, Debug)]
pub enum ChildrenKind<'s> {
    Choice(Choice<'s>),
    Seq(Seq<'s>),
}
#[derive(Clone, Debug)]
pub struct Children<'s> {
    kind: ChildrenKind<'s>,
    occurence: Occurence,
}
/// [47]   	children	   ::=   	(choice | seq) ('?' | '*' | '+')?
pub fn children(input: &str) -> IResult<&str, Children> {
    map(
        pair(
            alt((
                map(choice, |choice| ChildrenKind::Choice(choice)),
                map(seq, |seq| ChildrenKind::Seq(seq)),
            )),
            occurence,
        ),
        |(kind, occurence)| Children { kind, occurence },
    )(input)
    // alt((
    //     map(pair(choice, occurence), |(choice, occurence)| Children::Choice(choice, occurence)),
    //     map(pair(seq, occurence), |(seq, occurence)| Children::Seq(seq, occurence))
    // ))(input)
}

#[derive(Clone, Debug)]
pub enum CpKind<'s> {
    Name(QName<'s>),
    Choice(Choice<'s>),
    Seq(Seq<'s>),
}
#[derive(Clone, Debug)]
pub struct Cp<'s> {
    kind: CpKind<'s>,
    occurence: Occurence,
}
/// [18]   	cp	   ::=   	(QName | choice | seq) ('?' | '*' | '+')?
/// [48]   	cp	   ::=   	(Name | choice | seq) ('?' | '*' | '+')?
pub fn cp(input: &str) -> IResult<&str, Cp> {
    map(
        pair(
            alt((
                map(q_name, |name| CpKind::Name(name)),
                map(choice, |choice| CpKind::Choice(choice)),
                map(seq, |seq| CpKind::Seq(seq)),
            )),
            occurence,
        ),
        |(kind, occurence)| Cp { kind, occurence },
    )(input)
}

#[derive(Clone, Debug)]
pub struct Choice<'s>(Vec<Cp<'s>>);
/// [49]   	choice	   ::=   	'(' S? cp ( S? '|' S? cp )+ S? ')'
pub fn choice(input: &str) -> IResult<&str, Choice> {
    map(
        delimited(
            pair(tag("("), opt(s)),
            pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))),
            pair(opt(s), tag(")")),
        ),
        |(head, tail)| {
            let choice = vec![vec![head], tail].concat();
            Choice(choice)
        },
    )(input)
}

#[derive(Clone, Debug)]
pub struct Seq<'s>(Vec<Cp<'s>>);
/// [50]   	seq	   ::=   	'(' S? cp ( S? ',' S? cp )* S? ')'
pub fn seq(input: &str) -> IResult<&str, Seq> {
    map(
        delimited(
            pair(tag("("), opt(s)),
            pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))),
            pair(opt(s), tag(")")),
        ),
        |(head, tail)| {
            let seq = vec![vec![head], tail].concat();
            Seq(seq)
        },
    )(input)
}

// always contains #PCDATA
#[derive(Clone, Debug)]
pub struct Mixed<'s>(Vec<QName<'s>>);
/// [19]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'
/// [51]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
pub fn mixed(input: &str) -> IResult<&str, Mixed> {
    alt((
        map(
            delimited(
                tuple((tag("("), s, tag("#PCDATA"))),
                many0(preceded(tuple((opt(s), tag("|"), opt(s))), q_name)),
                pair(opt(s), tag(")*")),
            ),
            |names| Mixed(names),
        ),
        value(
            Mixed(Vec::new()),
            tuple((tag("("), opt(s), tag("#PCDATA"), opt(s), tag(")"))),
        ),
    ))(input)
}

#[derive(Debug)]
pub struct AttlistDecl<'s> {
    element_type: QName<'s>,
    att_defs: Vec<AttDef<'s>>,
}
/// [20]   	AttlistDecl	   ::=   	'<!ATTLIST' S QName AttDef* S? '>'
/// [52]   	AttlistDecl	   ::=   	'<!ATTLIST' S Name AttDef* S? '>'
pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
    map(
        delimited(
            pair(tag("<!ATTLIST"), s),
            pair(q_name, many0(att_def)),
            pair(opt(s), tag(">")),
        ),
        |(element_type, att_defs)| AttlistDecl {
            element_type,
            att_defs,
        },
    )(input)
}

#[derive(Debug)]
pub enum AttDefName<'s> {
    QName(QName<'s>),
    NSAttName(NSAttName<'s>),
}
#[derive(Debug)]
pub struct AttDef<'s> {
    name: AttDefName<'s>,
    att_type: AttType<'s>,
    default_decl: DefaultDecl<'s>,
}
/// [21]   	AttDef	   ::=   	S (QName | NSAttName) S AttType S DefaultDecl
/// [53]   	AttDef	   ::=   	S Name S AttType S DefaultDecl
pub fn att_def(input: &str) -> IResult<&str, AttDef> {
    map(
        tuple((
            preceded(
                s,
                alt((
                    map(q_name, |q_name| AttDefName::QName(q_name)),
                    map(ns_att_name, |ns_att_name| {
                        AttDefName::NSAttName(ns_att_name)
                    }),
                )),
            ),
            preceded(s, att_type),
            preceded(s, default_decl),
        )),
        |(name, att_type, default_decl)| AttDef {
            name,
            att_type,
            default_decl,
        },
    )(input)
}

#[derive(Clone, Debug)]
pub enum AttType<'s> {
    StringType,
    TokenizedType(TokenizedType),
    EnumeratedType(EnumeratedType<'s>),
}
/// [54]   	AttType	   ::=   	StringType | TokenizedType | EnumeratedType
pub fn att_type(input: &str) -> IResult<&str, AttType> {
    alt((
        value(AttType::StringType, string_type),
        map(tokenized_type, |tokenized_type| {
            AttType::TokenizedType(tokenized_type)
        }),
        map(enumerated_type, |enumerated_type| {
            AttType::EnumeratedType(enumerated_type)
        }),
    ))(input)
}

pub type StringType<'s> = &'s str;
/// [55]   	StringType	   ::=   	'CDATA'
pub fn string_type(input: &str) -> IResult<&str, StringType> {
    tag("CDATA")(input)
}

#[derive(Clone, Debug)]
pub enum TokenizedType {
    ID,
    IDRef,
    IDRefs,
    Entity,
    Entities,
    NMToken,
    NMTokens,
}
/// [56]   	TokenizedType	   ::=   	'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> {
    alt((
        value(TokenizedType::ID, tag("ID")),
        // TODO: check if this is required
        // try idrefs first to avoid losing 'S'
        value(TokenizedType::IDRefs, tag("IDREFS")),
        value(TokenizedType::IDRef, tag("IDREF")),
        value(TokenizedType::Entity, tag("ENTITY")),
        value(TokenizedType::Entities, tag("ENTITIES")),
        // same here
        value(TokenizedType::NMTokens, tag("NMTOKENS")),
        value(TokenizedType::NMToken, tag("NMTOKEN")),
    ))(input)
}

#[derive(Debug, Clone)]
pub enum EnumeratedType<'s> {
    NotationType(NotationType<'s>),
    Enumeration(Enumeration<'s>),
}
/// [57]   	EnumeratedType	   ::=   	NotationType | Enumeration
pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> {
    alt((
        map(notation_type, |notation_type| {
            EnumeratedType::NotationType(notation_type)
        }),
        map(enumeration, |enumeration| {
            EnumeratedType::Enumeration(enumeration)
        }),
    ))(input)
}

#[derive(Debug, Clone)]
pub struct NotationType<'s>(Vec<Name<'s>>);
/// [58]   	NotationType	   ::=   	'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
pub fn notation_type(input: &str) -> IResult<&str, NotationType> {
    map(
        delimited(
            tuple((tag("NOTATION"), s, tag("("), opt(s))),
            pair(
                name,
                many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)),
            ),
            pair(opt(s), tag(")")),
        ),
        |(head, tail)| {
            let notation_type = vec![vec![head], tail].concat();
            NotationType(notation_type)
        },
    )(input)
}

#[derive(Debug, Clone)]
pub struct Enumeration<'s>(Vec<Nmtoken<'s>>);
/// [59]   	Enumeration	   ::=   	'(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
pub fn enumeration(input: &str) -> IResult<&str, Enumeration> {
    map(
        delimited(
            pair(tag("("), opt(s)),
            pair(
                nmtoken,
                many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)),
            ),
            pair(opt(s), tag(")")),
        ),
        |(head, tail)| {
            let enumeration = vec![vec![head], tail].concat();
            Enumeration(enumeration)
        },
    )(input)
}

#[derive(Debug, Clone)]
pub enum DefaultDecl<'s> {
    Required,
    Implied,
    Fixed(AttValue<'s>),
}
/// [60]   	DefaultDecl	   ::=   	'#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> {
    alt((
        value(DefaultDecl::Required, tag("#REQUIRED")),
        value(DefaultDecl::Implied, tag("#IMPLIED")),
        map(
            preceded(opt(pair(tag("#FIXED"), s)), att_value),
            |att_value| DefaultDecl::Fixed(att_value),
        ),
    ))(input)
}

pub enum ConditionalSect<'s> {
    IncludeSect(IncludeSect<'s>),
    IgnoreSect(IgnoreSect<'s>),
}
/// [61]   	conditionalSect	   ::=   	includeSect | ignoreSect
pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> {
    alt((
        map(include_sect, |include_sect| {
            ConditionalSect::IncludeSect(include_sect)
        }),
        map(ignore_sect, |ignore_sect| {
            ConditionalSect::IgnoreSect(ignore_sect)
        }),
    ))(input)
}

pub struct IncludeSect<'s>(ExtSubsetDecl<'s>);
/// [62]   	includeSect	   ::=   	'<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> {
    map(
        delimited(
            tuple((tag("<!["), opt(s), tag("INCLUDE"), opt(s), tag("["))),
            ext_subset_decl,
            tag("]]>"),
        ),
        |ext_subset_decl| IncludeSect(ext_subset_decl),
    )(input)
}

pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);
/// [63]   	ignoreSect	   ::=   	'<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> {
    map(
        delimited(
            tuple((tag("<!["), opt(s), tag("IGNORE"), opt(s), tag("["))),
            many0(ignore_sect_contents),
            tag("]]>"),
        ),
        |ignore_sect_contents| IgnoreSect(ignore_sect_contents),
    )(input)
}

pub struct IgnoreSectContents<'s> {
    // TODO: what the fuck does this mean
    ignore: Ignore<'s>,
    ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,
}
/// [64]   	ignoreSectContents	   ::=   	Ignore ('<![' ignoreSectContents ']]>' Ignore)*
pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> {
    map(
        pair(
            ignore,
            many0(tuple((
                delimited(tag("<!["), ignore_sect_contents, tag("]]>")),
                ignore,
            ))),
        ),
        |(ignore, ignore_list)| IgnoreSectContents {
            ignore,
            ignore_list,
        },
    )(input)
}

pub type Ignore<'s> = &'s str;
/// [65]   	Ignore	   ::=   	Char* - (Char* ('<![' | ']]>') Char*)
pub fn ignore(input: &str) -> IResult<&str, Ignore> {
    recognize(many_till(xmlchar, peek(alt((tag("<!["), tag("]]>"))))))(input)
}

#[derive(Clone, Debug)]
pub enum CharRef<'s> {
    Decimal(&'s str),
    Hexadecimal(&'s str),
}
/// [66]   	CharRef	   ::=   	'&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
    alt((
        delimited(
            tag("&#"),
            map(take_while(|c| matches!(c, '0'..='9')), |decimal| {
                CharRef::Decimal(decimal)
            }),
            tag(";"),
        ),
        delimited(
            tag("&#x"),
            map(
                take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )),
                |hexadecimal| CharRef::Hexadecimal(hexadecimal),
            ),
            tag(";"),
        ),
    ))(input)
}

#[derive(Clone, Debug)]
pub enum Reference<'s> {
    EntityRef(EntityRef<'s>),
    CharRef(CharRef<'s>),
}
/// [67]   	Reference	   ::=   	EntityRef | CharRef
pub fn reference(input: &str) -> IResult<&str, Reference> {
    alt((
        map(entity_ref, |entity_ref| Reference::EntityRef(entity_ref)),
        map(char_ref, |char_ref| Reference::CharRef(char_ref)),
    ))(input)
}

pub type EntityRef<'s> = &'s str;
/// [68]   	EntityRef	   ::=   	'&' Name ';'
pub fn entity_ref(input: &str) -> IResult<&str, EntityRef> {
    delimited(tag("&"), name, tag(";"))(input)
}

pub type PEReference<'s> = &'s str;
/// [69]   	PEReference	   ::=   	'%' Name ';'
pub fn pe_reference(input: &str) -> IResult<&str, PEReference> {
    delimited(tag("%"), name, tag(";"))(input)
}

#[derive(Debug)]
pub enum EntityDecl<'s> {
    GEDecl(GEDecl<'s>),
    PEDecl(PEDecl<'s>),
}
/// [70]   	EntityDecl	   ::=   	GEDecl | PEDecl
pub fn entity_decl(input: &str) -> IResult<&str, EntityDecl> {
    alt((
        map(ge_decl, |ge_decl| EntityDecl::GEDecl(ge_decl)),
        map(pe_decl, |pe_decl| EntityDecl::PEDecl(pe_decl)),
    ))(input)
}

#[derive(Debug)]
pub struct GEDecl<'s> {
    name: Name<'s>,
    entity_def: EntityDef<'s>,
}
/// [71]   	GEDecl	   ::=   	'<!ENTITY' S Name S EntityDef S? '>'
pub fn ge_decl(input: &str) -> IResult<&str, GEDecl> {
    map(
        delimited(
            pair(tag("<!ENTITY"), s),
            separated_pair(name, s, entity_def),
            pair(opt(s), tag(">")),
        ),
        |(name, entity_def)| GEDecl { name, entity_def },
    )(input)
}

#[derive(Debug)]
pub struct PEDecl<'s> {
    name: Name<'s>,
    pe_def: PEDef<'s>,
}
/// [72]   	PEDecl	   ::=   	'<!ENTITY' S '%' S Name S PEDef S? '>'
pub fn pe_decl(input: &str) -> IResult<&str, PEDecl> {
    map(
        delimited(
            tuple((tag("<!ENTITY"), s, tag("%"), s)),
            separated_pair(name, s, pe_def),
            pair(opt(s), tag(">")),
        ),
        |(name, pe_def)| PEDecl { name, pe_def },
    )(input)
}

#[derive(Debug)]
pub enum EntityDef<'s> {
    EntityValue(EntityValue<'s>),
    ExternalID {
        external_id: ExternalID<'s>,
        ndata_decl: Option<NDataDecl<'s>>,
    },
}
/// [73]   	EntityDef	   ::=   	EntityValue | (ExternalID NDataDecl?)
pub fn entity_def(input: &str) -> IResult<&str, EntityDef> {
    alt((
        map(entity_value, |entity_value| {
            EntityDef::EntityValue(entity_value)
        }),
        map(
            pair(external_id, opt(ndata_decl)),
            |(external_id, ndata_decl)| EntityDef::ExternalID {
                external_id,
                ndata_decl,
            },
        ),
    ))(input)
}

#[derive(Debug)]
pub enum PEDef<'s> {
    EntityValue(EntityValue<'s>),
    ExternalID(ExternalID<'s>),
}
/// [74]   	PEDef	   ::=   	EntityValue | ExternalID
pub fn pe_def(input: &str) -> IResult<&str, PEDef> {
    alt((
        map(entity_value, |entity_value| {
            PEDef::EntityValue(entity_value)
        }),
        map(external_id, |external_id| PEDef::ExternalID(external_id)),
    ))(input)
}

#[derive(Debug)]
pub enum ExternalID<'s> {
    SYSTEM {
        system_identifier: &'s str,
    },
    PUBLIC {
        public_identifier: &'s str,
        system_identifier: &'s str,
    },
}
/// [75]   	ExternalID	   ::=   	'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
// pub fn external_id(input: &str) -> IResult<&str, ExternalID> {
pub fn external_id(input: &str) -> IResult<&str, ExternalID> {
    alt((
        map(
            preceded(pair(tag("SYSTEM"), s), system_literal),
            |system_identifier| ExternalID::SYSTEM { system_identifier },
        ),
        map(
            preceded(
                pair(tag("PUBLIC"), s),
                separated_pair(pubid_literal, s, system_literal),
            ),
            |(public_identifier, system_identifier)| ExternalID::PUBLIC {
                public_identifier,
                system_identifier,
            },
        ),
    ))(input)
}

pub type NDataDecl<'s> = &'s str;
/// [76]   	NDataDecl	   ::=   	S 'NDATA' S Name
pub fn ndata_decl(input: &str) -> IResult<&str, NDataDecl> {
    preceded(tuple((s, tag("NDATA"), s)), name)(input)
}

pub struct TextDecl<'s> {
    version_info: Option<VersionInfo>,
    encoding_decl: EncodingDecl<'s>,
}
/// [77]   	TextDecl	   ::=   	'<?xml' VersionInfo? EncodingDecl S? '?>'
pub fn text_decl(input: &str) -> IResult<&str, TextDecl> {
    map(
        delimited(
            tag("<?xml"),
            pair(opt(version_info), terminated(encoding_decl, opt(s))),
            tag("?>"),
        ),
        |(version_info, encoding_decl)| TextDecl {
            version_info,
            encoding_decl,
        },
    )(input)
}

pub struct ExtParsedEnt<'s> {
    text_decl: Option<TextDecl<'s>>,
    content: Content<'s>,
}
/// [78]   	extParsedEnt	   ::=   	TextDecl? content
pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> {
    map(pair(opt(text_decl), content), |(text_decl, content)| {
        ExtParsedEnt { text_decl, content }
    })(input)
}

pub type EncodingDecl<'s> = EncName<'s>;
/// [80]   	EncodingDecl	   ::=   	S 'encoding' Eq ('"' EncName '"' | "'" EncName
pub fn encoding_decl(input: &str) -> IResult<&str, EncodingDecl> {
    preceded(
        tuple((s, tag("encoding"), eq)),
        alt((
            delimited(char('"'), enc_name, char('"')),
            delimited(char('\''), enc_name, char('\'')),
        )),
    )(input)
}

pub type EncName<'s> = &'s str;
/// [81]   	EncName	   ::=   	[A-Za-z] ([A-Za-z0-9._] | '-')*
pub fn enc_name(input: &str) -> IResult<&str, EncName> {
    recognize(pair(
        satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )),
        many0(satisfy(
            |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ),
        )),
    ))(input)
}

#[derive(Debug)]
pub struct NotationDecl<'s> {
    name: &'s str,
    id: NotationDeclID<'s>,
}
#[derive(Debug)]
pub enum NotationDeclID<'s> {
    External(ExternalID<'s>),
    Public(PublicID<'s>),
}
/// [82]   	NotationDecl	   ::=   	'<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
pub fn notation_decl(input: &str) -> IResult<&str, NotationDecl> {
    map(
        delimited(
            pair(tag("<!NOTATION"), s),
            separated_pair(
                name,
                s,
                alt((
                    map(external_id, |external_id| {
                        NotationDeclID::External(external_id)
                    }),
                    map(public_id, |public_id| NotationDeclID::Public(public_id)),
                )),
            ),
            pair(opt(s), tag(">")),
        ),
        |(name, id)| NotationDecl { name, id },
    )(input)
}

pub type PublicID<'s> = &'s str;
/// [83]   	PublicID	   ::=   	'PUBLIC' S PubidLiteral
pub fn public_id(input: &str) -> IResult<&str, PublicID> {
    preceded(pair(tag("PUBLIC"), s), pubid_literal)(input)
}

#[cfg(test)]
mod tests {
    use std::num::NonZero;

    use super::*;

    #[test]
    fn test_char_data() {
        assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi"));
        assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi"));
        assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi"));
        assert_eq!(Ok(("]]>def&ghi", "abc")), char_data("abc]]>def&ghi"));
        assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi"));
        assert_eq!(
            Err(Err::Incomplete(nom::Needed::Size(
                NonZero::new(3usize).unwrap()
            ))),
            char_data("abcdefghi")
        );
    }

    #[test]
    fn test_comment() {
        assert_eq!(Ok(("", "")), comment("<!---->"));
        assert_eq!(Ok(("", "asdf")), comment("<!--asdf-->"));
        assert_eq!(Ok(("", "as-df")), comment("<!--as-df-->"));
        assert_eq!(
            Err(Err::Incomplete(nom::Needed::Size(
                NonZero::new(2usize).unwrap()
            ))),
            comment("<!--asdf")
        );
    }

    #[test]
    fn test_pi_target() {
        assert_eq!(Ok((" ", "asdf")), pi_target("asdf "));
        assert_eq!(Ok((" ", "xmlasdf")), pi_target("xmlasdf "));
        assert_eq!(
            Err(Err::Error(Error {
                input: "xml ",
                code: ErrorKind::Tag
            })),
            pi_target("xml ")
        );
        assert_eq!(
            Err(Err::Error(Error {
                input: "xMl ",
                code: ErrorKind::Tag
            })),
            pi_target("xMl ")
        );
    }

    #[test]
    fn test_cd_sect() {
        assert_eq!(
            Ok((
                "",
                ("<![CDATA[", "<greeting>Hello, world!</greeting>", "]]>")
            )),
            cd_sect("<![CDATA[<greeting>Hello, world!</greeting>]]>")
        )
    }

    #[test]
    fn test_cd_start() {
        assert_eq!(Ok(("asdf", "<![CDATA[")), cd_start("<![CDATA[asdf"))
    }

    #[test]
    fn test_cdata() {
        assert_eq!(Ok(("]]>asdf", "asdf")), cdata("asdf]]>asdf"));
        assert_eq!(
            Ok(("]]>asdf", "<![CDATA[asdf")),
            cdata("<![CDATA[asdf]]>asdf")
        );
        assert_eq!(
            Ok(("]]>asdf", "<greeting>Hello, world!</greeting>")),
            cdata("<greeting>Hello, world!</greeting>]]>asdf")
        )
    }

    #[test]
    fn test_cd_end() {
        assert_eq!(Ok(("asdf", "]]>")), cd_end("]]>asdf"))
    }
}