aboutsummaryrefslogblamecommitdiffstats
path: root/src/xml/mod.rs
blob: 005a12232c705cc18fbdac053511d6f56f466fd4 (plain) (tree)
1
2
3
4
5
6
7
8
9
                            

                             
 
                                        
 
                  
                
                         
 
                                                                                




                                         
 
                                                               
                       
                                               
 







                                        
                                                        

                          
 
                                                                
                                      
                               
 







                                      
                                                                     
                                      



                                       
 















                                                                                      









                                                                                       
 
                                                                    
                                      
                             

                                         
 
 
                                                         
                                      
                                             
 







                                       
                                              
                                      
                                  
 







                                      
                                                      
                                      
                                     
 







                                      

           
                                                                    





                         
 
                                                                                                                                                                                            

                      
 







                                      
                                                                 
                
                    
             
 
                                                                                                                                                                                                                                                                                     

                               
 







                                      
                                                                                                                            

                          
 







                                      
                                                                 


                                      
 







                                      
                                                         
                    
                                    
 







                                      
                                                   


                                
 







                                      
                                                                       
                    
                                          
 







                                      
                       
                              



                                 

                                                                                           
                



                                           
 
                                      



                             

                                                                             
                                      



                                        
 

















                                                                                        



























                                                                                 
                                                                                 
                



                            
 
                                                                                               
                



                           
 
                                                                                                      

                           
 
                                                                              


                                      
 







                                      


                                                                                         
                                           
 







                                      
                                                                                            

                       
                         

                                 
 
                                                                                            


                                      
 
                                                           


                                      
 







                                      
                                                   

                                      
 
                                                                     


                                      
 







                                      
                                             

                                      
 
                                                                           





                                                                      
 
                                                                                         

                        


                                                       
 
 
                                                                                                        
                



                             
 










                                                                  
                                                 

                
 
                                                           

                       


              
 
                                                        

                       

                         
                                        

      
 

                                                                                                                                        

                            
                    


                                        
 
                                                       

                       



                                 
 

                                   


                               
                                                                                       
                                                                       

                                                        
 
                                                                                                                    

                         
                                 





                                   
 
                                                                       
                          


                                       
 
                                   



                                         
                                                                                          
                                                            
 
                                                                                                               




                       
 










                                         

                                                 
                                                                        
                       
                      

                                              
 
 

                                                                       
                       
                     

                                              
 
 


                                                                                         










                               
 

                                                         
                       
                     
                               
 
 
                       
                          
                              





                             
                                                                                                            
                       
                        

                                                                     
 
 

                                                                                                      
                       
                             

                                              
 
 

                                                                                        

                            
                    

                                 

                   
                                 
                                                                                  

                          




                           
 
                                    

                       




             
 

                           


                       
                                                                                 

                         


                           
 

                       
                    


                       

                                                                                 

                       


                         
 
                                                                          

                                   
 
                                                                          

                                

                          
                                                                                                             
                                                                                                            

                                     
 

                                                                                  

                            
                            

                              
 
                



                             

                                                                                     
                
                       
                         


                                  
 
                                                                                   

                       



                                       
 
                                                       

                      
 
                                                                                                                          

                        







             
 
                                                                          

                             


                                   
 
                                                                                                 

                                           
 
                                                                                          

                                             
 
                                                                                                   

                          

             

                                                                 
 
 
                                                                        
                              


                                 
 
                                                                                             
                                              
 
                                                                                                  
                                                       
 
                                                                                                       
                                   



                                                           
 
                                                                             
                               
 
                                                                                
                                      
                      


                         
 




                                                  


                                                                                    

                                          

                                                                                    



                                         
                                                                                            



                                                                                                                                  
                                                                               



          
                                                                   
                                      
                        


                             
 















                                                                                   
                                                            

                                              
 







                                      
                                                            


                                     
 
                                                               

                         


                       
 
                                                                            

                       


                              
 
                                                                              

                       


                      
 
                                                                                     

                        


                                    
                                           

      
 
                                                                

                    


                                 
 
                                                                                                                  

                         
            
                                             

            

                                             

      
 
                                                                

                                   
 
                                                                                         
                         


                                      
 
                                                                 
                             


                                    
 
                                                                                                    
                
                              
                                         
 







                                      
                                                                       
                

                                







                                      

                             



                                                                                                    



                             

 
                                                                       

                                          
























































                                                                                       
use std::{char, ops::Deref};

use parsers_complete::Parser;

use crate::error::{CharRefError, Error};

pub mod composers;
pub mod parsers;
pub mod parsers_complete;

/// [1]   	NSAttName	   ::=   	PrefixedAttName | DefaultAttName
#[derive(Clone, Debug)]
pub enum NSAttName<'s> {
    PrefixedAttName(PrefixedAttName<'s>),
    DefaultAttName,
}

/// [2]   	PrefixedAttName	   ::=   	'xmlns:' NCName
#[derive(Clone, Debug)]
pub struct PrefixedAttName<'s>(pub NCName<'s>);

impl<'s> Deref for PrefixedAttName<'s> {
    type Target = NCName<'s>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [3]   	DefaultAttName	   ::=   	'xmlns';
#[derive(Clone, Debug)]
pub struct DefaultAttName;

/// [4]   	NCName	   ::=   	Name - (Char* ':' Char*)
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct NCName<'s>(&'s str);

impl<'s> Deref for NCName<'s> {
    type Target = &'s str;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [7]   	QName	   ::=   	PrefixedName | UnprefixedName
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum QName<'s> {
    PrefixedName(PrefixedName<'s>),
    UnprefixedName(UnprefixedName<'s>),
}

impl<'s> QName<'s> {
    pub fn prefix(&self) -> Option<&'s str> {
        match self {
            QName::PrefixedName(prefixed_name) => return Some(**prefixed_name.prefix),
            QName::UnprefixedName(_) => return None,
        }
    }

    pub fn local_part(&self) -> &str {
        match self {
            QName::PrefixedName(prefixed_name) => return **prefixed_name.local_part,
            QName::UnprefixedName(unprefixed_name) => return ****unprefixed_name,
        }
    }
}

// impl<'s> ToString for QName<'s> {
//     fn to_string(&self) -> String {
//         match self {
//             QName::PrefixedName(prefixed_name) => {
//                 format!("{}:{}", **prefixed_name.prefix, **prefixed_name.local_part)
//             }
//             QName::UnprefixedName(unprefixed_name) => unprefixed_name.to_string(),
//         }
//     }
// }

/// [8]   	PrefixedName	   ::=   	Prefix ':' LocalPart
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrefixedName<'s> {
    pub(crate) prefix: Prefix<'s>,
    pub(crate) local_part: LocalPart<'s>,
}

/// [9]   	UnprefixedName	   ::=   	LocalPart
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct UnprefixedName<'s>(LocalPart<'s>);

impl<'s> Deref for UnprefixedName<'s> {
    type Target = LocalPart<'s>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [10]   	Prefix	   ::=   	NCName
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Prefix<'s>(NCName<'s>);

impl<'s> Deref for Prefix<'s> {
    type Target = NCName<'s>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [11]   	LocalPart	   ::=   	NCName
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct LocalPart<'s>(NCName<'s>);

impl<'s> Deref for LocalPart<'s> {
    type Target = NCName<'s>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

// xml spec

/// [1]   	document	   ::=   	prolog element Misc*
#[derive(Debug)]
pub struct Document<'s> {
    prolog: Prolog<'s>,
    element: Element<'s>,
    miscs: Vec<Misc<'s>>,
}

/// [2]   	Char	   ::=   	#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]	/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
#[repr(transparent)]
pub struct Char(char);

impl Deref for Char {
    type Target = char;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [3]   	S	   ::=   	(#x20 | #x9 | #xD | #xA)+
#[derive(Clone)]
#[repr(transparent)]
pub struct S;

/// [4]   	NameStartChar	   ::=   	":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
#[repr(transparent)]
pub struct NameStartChar(char);

impl Deref for NameStartChar {
    type Target = char;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [4a]   	NameChar	   ::=   	NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
#[repr(transparent)]
pub struct NameChar(char);

impl Deref for NameChar {
    type Target = char;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [5]   	Name	   ::=   	NameStartChar (NameChar)*
#[derive(Debug, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct Name<'s>(&'s str);

impl<'s> Deref for Name<'s> {
    type Target = &'s str;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [6]   	Names	   ::=   	Name (#x20 Name)*
#[repr(transparent)]
pub struct Names<'s>(Vec<Name<'s>>);

impl<'s> Deref for Names<'s> {
    type Target = Vec<Name<'s>>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [7]   	Nmtoken	   ::=   	(NameChar)+
#[derive(Debug, Clone)]
#[repr(transparent)]
pub struct Nmtoken<'s>(&'s str);

impl<'s> Deref for Nmtoken<'s> {
    type Target = &'s str;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [8]   	Nmtokens	   ::=   	Nmtoken (#x20 Nmtoken)*
#[repr(transparent)]
pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>);

impl<'s> Deref for Nmtokens<'s> {
    type Target = Vec<Nmtoken<'s>>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

#[derive(Clone, Debug)]
pub enum EntityValueData<'s> {
    String(&'s str),
    PEReference(PEReference<'s>),
    Reference(Reference<'s>),
}
/// [9]   	EntityValue	   ::=   	'"' ([^%&"] | PEReference | Reference)* '"'
///			|  "'" ([^%&'] | PEReference | Reference)* "'"
#[derive(Debug)]
pub enum EntityValue<'s> {
    DoubleQuoted(Vec<EntityValueData<'s>>),
    SingleQuoted(Vec<EntityValueData<'s>>),
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum AttValueData<'s> {
    String(&'s str),
    Reference(Reference<'s>),
}
/// [10]   	AttValue	   ::=   	'"' ([^<&"] | Reference)* '"'
/// 			|  "'" ([^<&'] | Reference)* "'"
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum AttValue<'s> {
    DoubleQuoted(Vec<AttValueData<'s>>),
    SingleQuoted(Vec<AttValueData<'s>>),
}

impl<'s> AttValue<'s> {
    pub fn process(&self) -> crate::Result<String> {
        let mut output = String::new();
        let data;
        match self {
            AttValue::DoubleQuoted(vec) => data = vec,
            AttValue::SingleQuoted(vec) => data = vec,
        }
        for data in data {
            match data {
                AttValueData::String(s) => output.push_str(s),
                AttValueData::Reference(reference) => output.push(reference.process()?),
            }
        }
        Ok(output)
    }
}

impl<'s> From<&'s str> for AttValue<'s> {
    fn from(s: &'s str) -> AttValue<'s> {
        let mut data = Vec::new();
        for str in s.split_inclusive(|c| c == '<' || c == '"') {
            if let Some(str) = str.strip_suffix('<') {
                if !str.is_empty() {
                    data.push(AttValueData::String(str))
                }
                data.push(AttValueData::Reference(Reference::EntityRef(EntityRef(
                    Name::parse_full("lt").unwrap(),
                ))))
            } else if let Some(str) = str.strip_suffix('"') {
                if !str.is_empty() {
                    data.push(AttValueData::String(str))
                }
                data.push(AttValueData::Reference(Reference::EntityRef(EntityRef(
                    Name::parse_full("quot").unwrap(),
                ))))
            } else {
                if !str.is_empty() {
                    data.push(AttValueData::String(str))
                }
            }
        }
        AttValue::DoubleQuoted(data)
    }
}

/// [11]   	SystemLiteral	   ::=   	('"' [^"]* '"') | ("'" [^']* "'")
#[derive(Debug)]
pub enum SystemLiteral<'s> {
    DoubleQuoted(&'s str),
    SingleQuoted(&'s str),
}

/// [12]   	PubidLiteral	   ::=   	'"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
#[derive(Debug)]
pub enum PubidLiteral<'s> {
    DoubleQuoted(&'s str),
    SingleQuoted(&'s str),
}

/// [13]   	PubidChar	   ::=   	#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
#[repr(transparent)]
pub struct PubidChar(char);

/// [14]   	CharData	   ::=   	[^<&]* - ([^<&]* ']]>' [^<&]*)
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct CharData<'s>(&'s str);

impl<'s> Deref for CharData<'s> {
    type Target = &'s str;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [15]    Comment	   ::=   	'<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Comment<'s>(pub(crate) &'s str);

impl<'s> Deref for Comment<'s> {
    type Target = &'s str;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [16]   	PI	   ::=   	'<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
#[derive(Clone, Debug)]
pub struct PI<'s> {
    target: PITarget<'s>,
    instruction: Option<&'s str>,
}

/// [17]   	PITarget	   ::=   	Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct PITarget<'s>(Name<'s>);

/// [18]   	CDSect	   ::=   	CDStart CData CDEnd
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct CDSect<'s>(CData<'s>);

impl<'s> Deref for CDSect<'s> {
    type Target = CData<'s>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [19]   	CDStart	   ::=   	'<![CDATA['
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDStart;

/// [20]   	CData	   ::=   	(Char* - (Char* ']]>' Char*))
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct CData<'s>(&'s str);

impl<'s> Deref for CData<'s> {
    type Target = &'s str;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [21]   	CDEnd	   ::=   	']]>'
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDEnd;

/// [22]   	prolog	   ::=   	XMLDecl? Misc* (doctypedecl Misc*)?
#[derive(Debug)]
pub struct Prolog<'s> {
    pub(crate) xml_decl: Option<XMLDecl<'s>>,
    pub(crate) miscs: Vec<Misc<'s>>,
    pub(crate) doctype_decl: Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,
}

/// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
#[derive(Debug)]
pub struct XMLDecl<'s> {
    pub(crate) version_info: VersionInfo,
    pub(crate) encoding_decl: Option<EncodingDecl<'s>>,
    pub(crate) sd_decl: Option<SDDecl>,
}

/// [24]   	VersionInfo	   ::=   	S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
#[derive(Debug)]
pub enum VersionInfo {
    SingleQuoted(VersionNum),
    DoubleQuoted(VersionNum),
}

impl Deref for VersionInfo {
    type Target = VersionNum;

    fn deref(&self) -> &Self::Target {
        match self {
            VersionInfo::SingleQuoted(version_num) => version_num,
            VersionInfo::DoubleQuoted(version_num) => version_num,
        }
    }
}

/// [25]   	Eq	   ::=   	S? '=' S?
#[derive(Clone)]
pub struct Eq;

/// [26]   	VersionNum	   ::=   	'1.' [0-9]+
#[derive(Clone, Debug)]
pub enum VersionNum {
    One,
    OneDotOne,
}

/// [27]   	Misc	   ::=   	Comment | PI | S
#[derive(Clone, Debug)]
pub enum Misc<'s> {
    Comment(Comment<'s>),
    PI(PI<'s>),
    // TODO: how to deal with whitespace
    S,
}

/// [16]   	doctypedecl	   ::=   	'<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
/// [28]   	doctypedecl	   ::=   	'<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
#[derive(Debug)]
pub struct DoctypeDecl<'s> {
    name: QName<'s>,
    external_id: Option<ExternalID<'s>>,
    int_subset: Option<IntSubset<'s>>,
}

/// [28a]   	DeclSep	   ::=   	PEReference | S
#[derive(Clone, Debug)]
pub enum DeclSep<'s> {
    PEReference(PEReference<'s>),
    // TODO: tackle whitespace
    S,
}

#[derive(Debug)]
pub enum IntSubsetDeclaration<'s> {
    MarkupDecl(MarkupDecl<'s>),
    DeclSep(DeclSep<'s>),
}
/// from [16]   	intSubset	   ::=   	(markupdecl | PEReference | S)*
/// [28b]   	intSubset	   ::=   	(markupdecl | DeclSep)*
#[derive(Debug)]
pub struct IntSubset<'s>(Vec<IntSubsetDeclaration<'s>>);

/// [29]   	markupdecl	   ::=   	elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
#[derive(Debug)]
pub enum MarkupDecl<'s> {
    Elementdecl(Elementdecl<'s>),
    AttlistDecl(AttlistDecl<'s>),
    EntityDecl(EntityDecl<'s>),
    NotationDecl(NotationDecl<'s>),
    PI(PI<'s>),
    Comment(Comment<'s>),
}

/// [30]   	extSubset	   ::=   	TextDecl? extSubsetDecl
pub struct ExtSubset<'s> {
    text_decl: Option<TextDecl<'s>>,
    ext_subset_decl: ExtSubsetDecl<'s>,
}

pub enum ExtSubsetDeclaration<'s> {
    MarkupDecl(MarkupDecl<'s>),
    ConditionalSect(ConditionalSect<'s>),
    DeclSep(DeclSep<'s>),
}
/// [31]   	extSubsetDecl	   ::=   	( markupdecl | conditionalSect | DeclSep)*
pub struct ExtSubsetDecl<'s>(Vec<ExtSubsetDeclaration<'s>>);

/// [32]   	SDDecl	   ::=   	S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
#[derive(Debug, Clone)]
pub enum SDDecl {
    SingleQuoted(bool),
    DoubleQuoted(bool),
}

impl Deref for SDDecl {
    type Target = bool;

    fn deref(&self) -> &Self::Target {
        match self {
            SDDecl::SingleQuoted(b) => b,
            SDDecl::DoubleQuoted(b) => b,
        }
    }
}

// (Productions 33 through 38 have been removed.)

/// [39]   	element	   ::=   	EmptyElemTag | STag content ETag
#[derive(Debug, Clone)]
pub enum Element<'s> {
    Empty(EmptyElemTag<'s>),
    NotEmpty(STag<'s>, Content<'s>, ETag<'s>),
}

/// [12]   	STag	   ::=   	'<' QName (S Attribute)* S? '>'
/// [40]   	STag	   ::=   	'<' Name (S Attribute)* S? '>'
#[derive(Debug, Clone)]
pub struct STag<'s> {
    pub(crate) name: QName<'s>,
    pub(crate) attributes: Vec<Attribute<'s>>,
}

/// [15]   	Attribute	   ::=   	NSAttName Eq AttValue | QName Eq AttValue
// pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
/// [41]   	Attribute	   ::=   	Name Eq AttValue
#[derive(Debug, Clone)]
pub enum Attribute<'s> {
    NamespaceDeclaration {
        ns_name: NSAttName<'s>,
        value: AttValue<'s>,
    },
    Attribute {
        name: QName<'s>,
        value: AttValue<'s>,
    },
}

/// [13]   	ETag	   ::=   	'</' QName S? '>'
/// [42]   	ETag	   ::=   	'</' Name S? '>'
#[derive(Debug, Clone)]
pub struct ETag<'s> {
    pub(crate) name: QName<'s>,
}

#[derive(Debug, Clone)]
pub enum ContentItem<'s> {
    // CharData(CharData<'s>),
    Element(Element<'s>),
    Reference(Reference<'s>),
    CDSect(CDSect<'s>),
    PI(PI<'s>),
    Comment(Comment<'s>),
}
/// [43]   	content	   ::=   	CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
#[derive(Debug, Clone)]
pub struct Content<'s> {
    pub(crate) char_data: Option<CharData<'s>>,
    pub(crate) content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
}

/// [14]   	EmptyElemTag	   ::=   	'<' QName (S Attribute)* S? '/>'
/// [44]   	EmptyElemTag	   ::=   	'<' Name (S Attribute)* S? '/>'	[WFC: Unique Att Spec]
#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
    pub(crate) name: QName<'s>,
    pub(crate) attributes: Vec<Attribute<'s>>,
}

/// [17]   	elementdecl	   ::=   	'<!ELEMENT' S QName S contentspec S? '>'
/// [45]   	elementdecl	   ::=   	'<!ELEMENT' S Name S contentspec S? '>'
#[derive(Debug)]
pub struct Elementdecl<'s> {
    name: QName<'s>,
    contentspec: Contentspec<'s>,
}

// TODO: casings???
// TODO: wtf does that todo mean?
/// [46]   	contentspec	   ::=   	'EMPTY' | 'ANY' | Mixed | children
#[derive(Clone, Debug)]
pub enum Contentspec<'s> {
    Empty,
    Any,
    Mixed(Mixed<'s>),
    Children(Children<'s>),
}

/// Occurence ::= ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub enum Occurence {
    Once,
    Optional,
    Many0,
    Many1,
}

#[derive(Clone, Debug)]
pub enum ChildrenKind<'s> {
    Choice(Choice<'s>),
    Seq(Seq<'s>),
}
/// [47]   	children	   ::=   	(choice | seq) ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub struct Children<'s> {
    kind: ChildrenKind<'s>,
    occurence: Occurence,
}

#[derive(Clone, Debug)]
pub enum CpKind<'s> {
    Name(QName<'s>),
    Choice(Choice<'s>),
    Seq(Seq<'s>),
}
/// [18]   	cp	   ::=   	(QName | choice | seq) ('?' | '*' | '+')?
/// [48]   	cp	   ::=   	(Name | choice | seq) ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub struct Cp<'s> {
    kind: CpKind<'s>,
    occurence: Occurence,
}

/// [49]   	choice	   ::=   	'(' S? cp ( S? '|' S? cp )+ S? ')'
#[derive(Clone, Debug)]
pub struct Choice<'s>(Vec<Cp<'s>>);

/// [50]   	seq	   ::=   	'(' S? cp ( S? ',' S? cp )* S? ')'
#[derive(Clone, Debug)]
pub struct Seq<'s>(Vec<Cp<'s>>);

// always contains #PCDATA
/// [19]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'
/// [51]   	Mixed	   ::=   	'(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
#[derive(Clone, Debug)]
pub struct Mixed<'s>(Vec<QName<'s>>);

/// [20]   	AttlistDecl	   ::=   	'<!ATTLIST' S QName AttDef* S? '>'
/// [52]   	AttlistDecl	   ::=   	'<!ATTLIST' S Name AttDef* S? '>'
#[derive(Debug)]
pub struct AttlistDecl<'s> {
    element_type: QName<'s>,
    att_defs: Vec<AttDef<'s>>,
}

#[derive(Debug)]
pub enum AttDefName<'s> {
    QName(QName<'s>),
    NSAttName(NSAttName<'s>),
}
/// [21]   	AttDef	   ::=   	S (QName | NSAttName) S AttType S DefaultDecl
/// [53]   	AttDef	   ::=   	S Name S AttType S DefaultDecl
#[derive(Debug)]
pub struct AttDef<'s> {
    name: AttDefName<'s>,
    att_type: AttType<'s>,
    default_decl: DefaultDecl<'s>,
}

/// [54]   	AttType	   ::=   	StringType | TokenizedType | EnumeratedType
#[derive(Clone, Debug)]
pub enum AttType<'s> {
    StringType,
    TokenizedType(TokenizedType),
    EnumeratedType(EnumeratedType<'s>),
}

/// [55]   	StringType	   ::=   	'CDATA'
#[derive(Clone)]
pub struct StringType;

/// [56]   	TokenizedType	   ::=   	'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
#[derive(Clone, Debug)]
pub enum TokenizedType {
    ID,
    IDRef,
    IDRefs,
    Entity,
    Entities,
    NMToken,
    NMTokens,
}

/// [57]   	EnumeratedType	   ::=   	NotationType | Enumeration
#[derive(Debug, Clone)]
pub enum EnumeratedType<'s> {
    NotationType(NotationType<'s>),
    Enumeration(Enumeration<'s>),
}

/// [58]   	NotationType	   ::=   	'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
#[derive(Debug, Clone)]
pub struct NotationType<'s>(Vec<Name<'s>>);

/// [59]   	Enumeration	   ::=   	'(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
#[derive(Debug, Clone)]
pub struct Enumeration<'s>(Vec<Nmtoken<'s>>);

/// [60]   	DefaultDecl	   ::=   	'#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
#[derive(Debug, Clone)]
pub enum DefaultDecl<'s> {
    Required,
    Implied,
    /// if bool == true, attribute MUST always have default value
    Fixed(bool, AttValue<'s>),
}

/// [61]   	conditionalSect	   ::=   	includeSect | ignoreSect
pub enum ConditionalSect<'s> {
    IncludeSect(IncludeSect<'s>),
    IgnoreSect(IgnoreSect<'s>),
}

/// [62]   	includeSect	   ::=   	'<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
pub struct IncludeSect<'s>(ExtSubsetDecl<'s>);

/// [63]   	ignoreSect	   ::=   	'<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);

/// [64]   	ignoreSectContents	   ::=   	Ignore ('<![' ignoreSectContents ']]>' Ignore)*
pub struct IgnoreSectContents<'s> {
    // TODO: what the fuck does this mean
    ignore: Ignore<'s>,
    ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,
}

/// [65]   	Ignore	   ::=   	Char* - (Char* ('<![' | ']]>') Char*)
pub struct Ignore<'s>(&'s str);

/// [66]   	CharRef	   ::=   	'&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum CharRef<'s> {
    Decimal(&'s str),
    Hexadecimal(&'s str),
}

impl<'s> CharRef<'s> {
    pub fn process(&self) -> crate::Result<char> {
        let int: u32;
        match self {
            CharRef::Decimal(dec) => {
                int = dec
                    .parse()
                    .map_err(|e| Error::InvalidCharRef(CharRefError::ParseInt(e)))?;
            }
            CharRef::Hexadecimal(hex) => {
                int = <u32>::from_str_radix(hex, 16)
                    .map_err(|e| Error::InvalidCharRef(CharRefError::ParseInt(e)))?;
            }
        }
        let c = std::char::from_u32(int);

        let c = c.ok_or_else(|| Error::InvalidCharRef(CharRefError::IntegerNotAChar(int)))?;
        if matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}')
        {
            return Ok(c);
        } else {
            return Err(Error::InvalidCharRef(CharRefError::InvalidXMLChar(c)));
        };
    }
}

/// [67]   	Reference	   ::=   	EntityRef | CharRef
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Reference<'s> {
    EntityRef(EntityRef<'s>),
    CharRef(CharRef<'s>),
}

impl<'s> Reference<'s> {
    pub fn process(&self) -> crate::Result<char> {
        match self {
            Reference::EntityRef(entity_ref) => match *entity_ref.deref().deref() {
                "amp" => Ok('&'),
                "lt" => Ok('<'),
                "gt" => Ok('>'),
                "apos" => Ok('\''),
                "quot" => Ok('"'),
                e => return Err(Error::EntityProcessError(e.to_string())),
            },
            Reference::CharRef(char_ref) => Ok(char_ref.process()?),
        }
    }
}

/// [68]   	EntityRef	   ::=   	'&' Name ';'
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct EntityRef<'s>(pub(crate) Name<'s>);

impl<'s> Deref for EntityRef<'s> {
    type Target = Name<'s>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [69]   	PEReference	   ::=   	'%' Name ';'
#[derive(Clone, Debug)]
#[repr(transparent)]
pub struct PEReference<'s>(Name<'s>);

/// [70]   	EntityDecl	   ::=   	GEDecl | PEDecl
#[derive(Debug)]
pub enum EntityDecl<'s> {
    GEDecl(GEDecl<'s>),
    PEDecl(PEDecl<'s>),
}

/// [71]   	GEDecl	   ::=   	'<!ENTITY' S Name S EntityDef S? '>'
#[derive(Debug)]
pub struct GEDecl<'s> {
    name: Name<'s>,
    entity_def: EntityDef<'s>,
}

/// [72]   	PEDecl	   ::=   	'<!ENTITY' S '%' S Name S PEDef S? '>'
#[derive(Debug)]
pub struct PEDecl<'s> {
    name: Name<'s>,
    pe_def: PEDef<'s>,
}

/// [73]   	EntityDef	   ::=   	EntityValue | (ExternalID NDataDecl?)
#[derive(Debug)]
pub enum EntityDef<'s> {
    EntityValue(EntityValue<'s>),
    ExternalID {
        external_id: ExternalID<'s>,
        n_data_decl: Option<NDataDecl<'s>>,
    },
}

/// [74]   	PEDef	   ::=   	EntityValue | ExternalID
#[derive(Debug)]
pub enum PEDef<'s> {
    EntityValue(EntityValue<'s>),
    ExternalID(ExternalID<'s>),
}

/// [75]   	ExternalID	   ::=   	'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
#[derive(Debug)]
pub enum ExternalID<'s> {
    SYSTEM {
        system_identifier: SystemLiteral<'s>,
    },
    PUBLIC {
        public_identifier: PubidLiteral<'s>,
        system_identifier: SystemLiteral<'s>,
    },
}

/// [76]   	NDataDecl	   ::=   	S 'NDATA' S Name
#[derive(Debug)]
pub struct NDataDecl<'s>(Name<'s>);

/// [77]   	TextDecl	   ::=   	'<?xml' VersionInfo? EncodingDecl S? '?>'
pub struct TextDecl<'s> {
    version_info: Option<VersionInfo>,
    encoding_decl: EncodingDecl<'s>,
}

/// [78]   	extParsedEnt	   ::=   	TextDecl? content
pub struct ExtParsedEnt<'s> {
    text_decl: Option<TextDecl<'s>>,
    content: Content<'s>,
}

/// [80]   	EncodingDecl	   ::=   	S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
#[derive(Debug)]
// TODO?: select quote version
pub struct EncodingDecl<'s>(EncName<'s>);

impl<'s> Deref for EncodingDecl<'s> {
    type Target = EncName<'s>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// [81]   	EncName	   ::=   	[A-Za-z] ([A-Za-z0-9._] | '-')*
#[derive(Debug)]
pub struct EncName<'s>(&'s str);

impl<'s> Deref for EncName<'s> {
    type Target = &'s str;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

#[derive(Debug)]
pub enum NotationDeclID<'s> {
    External(ExternalID<'s>),
    Public(PublicID<'s>),
}
/// [82]   	NotationDecl	   ::=   	'<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
#[derive(Debug)]
pub struct NotationDecl<'s> {
    name: Name<'s>,
    id: NotationDeclID<'s>,
}

/// [83]   	PublicID	   ::=   	'PUBLIC' S PubidLiteral
#[derive(Debug)]
pub struct PublicID<'s>(PubidLiteral<'s>);

#[cfg(test)]
mod test {
    use super::{AttValue, AttValueData, EntityRef, Name, Reference};

    #[test]
    fn att_value_from_str() {
        assert_eq!(
            AttValue::from("hsdaflaskdf<laksdf<abdsf"),
            AttValue::DoubleQuoted(vec![
                AttValueData::String("hsdaflaskdf"),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("lt")))),
                AttValueData::String("laksdf"),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("lt")))),
                AttValueData::String("abdsf"),
            ])
        );
        assert_eq!(
            AttValue::from("hsdaflaskdf<laksdf\"abdsf"),
            AttValue::DoubleQuoted(vec![
                AttValueData::String("hsdaflaskdf"),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("lt")))),
                AttValueData::String("laksdf"),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("quot")))),
                AttValueData::String("abdsf"),
            ])
        );
        assert_eq!(
            AttValue::from("hsdaflaskdf<laksdf\""),
            AttValue::DoubleQuoted(vec![
                AttValueData::String("hsdaflaskdf"),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("lt")))),
                AttValueData::String("laksdf"),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("quot")))),
            ])
        );
        assert_eq!(
            AttValue::from("hsdaflaskdf\"<<laksdf\""),
            AttValue::DoubleQuoted(vec![
                AttValueData::String("hsdaflaskdf"),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("quot")))),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("lt")))),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("lt")))),
                AttValueData::String("laksdf"),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("quot")))),
            ])
        );
        assert_eq!(
            AttValue::from("<<\""),
            AttValue::DoubleQuoted(vec![
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("lt")))),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("lt")))),
                AttValueData::Reference(Reference::EntityRef(EntityRef(Name("quot")))),
            ])
        );
    }
}