diff options
| author | 2024-11-10 14:31:43 +0000 | |
|---|---|---|
| committer | 2024-11-10 14:31:43 +0000 | |
| commit | be50ab4890993ae97bc79138364cd5e316566e46 (patch) | |
| tree | cedf5db695f1a35a37e0c1df1efe1346d03477ea /src | |
| parent | 593cad573baf239337c5869c92ea9e7aed61e847 (diff) | |
| download | peanuts-be50ab4890993ae97bc79138364cd5e316566e46.tar.gz peanuts-be50ab4890993ae97bc79138364cd5e316566e46.tar.bz2 peanuts-be50ab4890993ae97bc79138364cd5e316566e46.zip | |
implement element reading
Diffstat (limited to '')
| -rw-r--r-- | src/element.rs | 44 | ||||
| -rw-r--r-- | src/error.rs | 17 | ||||
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/main.rs | 87 | ||||
| -rw-r--r-- | src/reader.rs | 325 | ||||
| -rw-r--r-- | src/xml/mod.rs | 241 | 
6 files changed, 681 insertions, 35 deletions
| diff --git a/src/element.rs b/src/element.rs index 35d73a3..0e0b8f1 100644 --- a/src/element.rs +++ b/src/element.rs @@ -1,23 +1,32 @@  // elements resemble a final tree, including inherited namespace information -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; + +use crate::{ +    error::Error, +    xml::{self, Attribute}, +};  // when are namespaces names chosen then if they are automatically calculated  // namespaces are held by readers and writers. +#[derive(PartialEq, Eq, Hash, Clone)]  pub struct Namespace { -    prefix: Option<String>, -    namespace: String, +    pub prefix: Option<String>, +    pub namespace: String,  }  // names are qualified, they contain a reference to the namespace (held within the reader/writer) +#[derive(PartialEq, Eq, Hash, Clone)]  pub struct Name { -    namespace: String, -    name: String, +    pub namespace: Namespace, +    pub name: String,  } -pub enum Node { +pub enum Content {      Element(Element),      Text(String), +    PI(String), +    Comment(String),  }  // should this be a trait? @@ -29,16 +38,35 @@ pub struct Element {      // namespace: String,      // hashmap of explicit namespace declarations on the element itself only      // possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader. -    pub namespace_decl: HashMap<Option<String>, String>, +    pub namespace_decl: HashSet<Namespace>,      // attributes can be in a different namespace than the element. how to make sure they are valid?      // maybe include the namespace instead of or with the prefix      // you can calculate the prefix from the namespaced name and the current writer context      // you can validate the prefix and calculate the namespace from the current reader context      // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.      pub attributes: HashMap<Name, String>, -    pub children: Option<Vec<Node>>, +    pub content: Vec<Content>,  } +// impl<'s> TryFrom<xml::Element<'s>> for Element<'s> { +//     type Error = Error; + +//     fn try_from(xml_element: xml::Element) -> Result<Self, Self::Error> { +//         match &xml_element { +//             xml::Element::Empty(empty_elem_tag) => { +//                 let namespace_decl; +//                 let attributes; +//                 empty_elem_tag +//                     .attributes +//                     .into_iter() +//                     .filter(|attribute| matches!(attribute, Attribute::NamespaceDeclaration(_))); +//                 todo!() +//             } +//             xml::Element::NotEmpty(stag, content, etag) => todo!(), +//         } +//     } +// } +  // example of deriving an element:  // #[derive(XMLWrite, XMLRead)] diff --git a/src/error.rs b/src/error.rs index 78508ae..96c709c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,9 +1,18 @@ -use std::str::Utf8Error; +use std::{num::ParseIntError, str::Utf8Error}; + +use crate::element::{Name, Namespace};  pub enum Error {      ReadError(std::io::Error),      Utf8Error(Utf8Error),      ParseError(String), +    EntityProcessError(String), +    // TODO: better choice for failures than string +    InvalidCharRef(String), +    DuplicateNameSpace(Namespace), +    DuplicateAttribute(String), +    UnqualifiedNamespace(String), +    MismatchedEndTag(String, String),  }  impl From<std::io::Error> for Error { @@ -17,3 +26,9 @@ impl From<Utf8Error> for Error {          Self::Utf8Error(e)      }  } + +impl From<ParseIntError> for Error { +    fn from(e: ParseIntError) -> Self { +        Self::InvalidCharRef(e.to_string()) +    } +} @@ -3,3 +3,5 @@ mod error;  mod reader;  mod writer;  pub mod xml; + +pub type Result<T> = std::result::Result<T, error::Error>; diff --git a/src/main.rs b/src/main.rs index ea86e07..580652e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,12 +5,87 @@ use peanuts::xml::Document;  #[tokio::main]  async fn main() {      let (rest, document) = Document::parse( -        "<?xml version=\"1.0\"?> -<TEST> -  <block1>Background Mark 1</block1> -  <block2>Background Mark 2</block2> -  <block3>Background Mark 3</block3> -</TEST>ahsdkjlfhasdlkjfhkljh +        "<?xml version='1.0' encoding='UTF-8'?> + +   <xs:schema +       xmlns:xs='http://www.w3.org/2001/XMLSchema' +       targetNamespace='http://etherx.jabber.org/streams' +       xmlns='http://etherx.jabber.org/streams' +       elementFormDefault='unqualified'> + +     <xs:import namespace='jabber:client'/> +     <xs:import namespace='jabber:server'/> +     <xs:import namespace='urn:ietf:params:xml:ns:xmpp-sasl'/> +     <xs:import namespace='urn:ietf:params:xml:ns:xmpp-streams'/> +     <xs:import namespace='urn:ietf:params:xml:ns:xmpp-tls'/> + +     <xs:element name='stream'> +       <xs:complexType> +         <xs:sequence xmlns:client='jabber:client' +                      xmlns:server='jabber:server'> +           <xs:element ref='features' +                       minOccurs='0' +                       maxOccurs='1'/> +           <xs:any namespace='urn:ietf:params:xml:ns:xmpp-tls' +                   minOccurs='0' +                   maxOccurs='1'/> +           <xs:any namespace='urn:ietf:params:xml:ns:xmpp-sasl' +                   minOccurs='0' +                   maxOccurs='1'/> +           <xs:any namespace='##other' +                   minOccurs='0' +                   maxOccurs='unbounded' +                   processContents='lax'/> +           <xs:choice minOccurs='0' maxOccurs='1'> +             <xs:choice minOccurs='0' maxOccurs='unbounded'> +               <xs:element ref='client:message'/> +               <xs:element ref='client:presence'/> +               <xs:element ref='client:iq'/> +             </xs:choice> +             <xs:choice minOccurs='0' maxOccurs='unbounded'> +               <xs:element ref='server:message'/> +               <xs:element ref='server:presence'/> +               <xs:element ref='server:iq'/> +             </xs:choice> +           </xs:choice> +           <xs:element ref='error' minOccurs='0' maxOccurs='1'/> +         </xs:sequence> +         <xs:attribute name='from' type='xs:string' use='optional'/> +         <xs:attribute name='id' type='xs:string' use='optional'/> +         <xs:attribute name='to' type='xs:string' use='optional'/> +         <xs:attribute name='version' type='xs:decimal' use='optional'/> +         <xs:attribute ref='xml:lang' use='optional'/> +         <xs:anyAttribute namespace='##other' processContents='lax'/> +       </xs:complexType> +     </xs:element> + +     <xs:element name='features'> +       <xs:complexType> +         <xs:sequence> +           <xs:any namespace='##other' +                   minOccurs='0' +                   maxOccurs='unbounded' +                   processContents='lax'/> +         </xs:sequence> +       </xs:complexType> +     </xs:element> + +     <xs:element name='error'> +       <xs:complexType> +         <xs:sequence  xmlns:err='urn:ietf:params:xml:ns:xmpp-streams'> +           <xs:group   ref='err:streamErrorGroup'/> +           <xs:element ref='err:text' +                       minOccurs='0' +                       maxOccurs='1'/> +           <xs:any     namespace='##other' +                       minOccurs='0' +                       maxOccurs='1' +                       processContents='lax'/> +         </xs:sequence> +       </xs:complexType> +     </xs:element> + +   </xs:schema>asdf  ",      )      .unwrap(); diff --git a/src/reader.rs b/src/reader.rs index 313de4c..b51489f 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1,31 +1,348 @@ +use circular::Buffer;  use futures::Stream;  use nom::Err; -use std::{collections::BTreeMap, str}; -use tokio::io::AsyncBufReadExt; +use std::{ +    collections::{BTreeMap, HashMap, HashSet}, +    path::Prefix, +    str::{self, FromStr}, +}; +use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt}; + +static MAX_STANZA_SIZE: usize = 65536;  use crate::{ -    element::{Element, Name, Namespace}, +    element::{Content, Element, Name, Namespace},      error::Error, +    xml::{self, parsers::Parser}, +    Result,  };  /// streaming reader that tracks depth and available namespaces at current depth  pub struct Reader<R> {      inner: R, +    buffer: Buffer,      // holds which tags we are in atm over depth +    // to have names reference namespaces could      depth: Vec<Name>, -    namespaces: Vec<(usize, Namespace)>, +    namespaces: Vec<HashSet<Namespace>>,  }  impl<R> Reader<R> {      pub fn new(reader: R) -> Self {          Self {              inner: reader, +            buffer: Buffer::with_capacity(MAX_STANZA_SIZE),              depth: Vec::new(),              namespaces: Vec::new(),          }      }  } +impl<R> Reader<R> +where +    R: AsyncRead + Unpin, +{ +    async fn read_buf(&mut self) -> Result<usize> { +        Ok(self.inner.read_buf(&mut self.buffer).await?) +    } + +    async fn read_element<'s>(&'s mut self) -> Result<Element> { +        self.read_buf().await?; +        let mut input = str::from_utf8(self.buffer.data())?; +        loop { +            match xml::Element::parse(input) { +                Ok((rest, e)) => { +                    let len = self.buffer.available_data() - rest.as_bytes().len(); +                    let element = Reader::<R>::element_from_xml(&mut self.namespaces, e)?; +                    self.buffer.consume(len); +                    return Ok(element); +                } +                std::result::Result::Err(e) => match e { +                    Err::Incomplete(_) => { +                        self.read_buf().await?; +                        input = str::from_utf8(self.buffer.data())?; +                    } +                    // TODO: better error +                    Err::Error(e) => return Err(Error::ParseError(e.to_string())), +                    Err::Failure(e) => return Err(Error::ParseError(e.to_string())), +                }, +            } +        } +    } +} + +impl<R> Reader<R> { +    fn element_from_xml( +        namespaces: &mut Vec<HashSet<Namespace>>, +        element: xml::Element, +    ) -> Result<Element> { +        match element { +            xml::Element::Empty(empty_elem_tag) => { +                let mut namespace_declarations = HashSet::new(); +                for (prefix, namespace) in +                    empty_elem_tag.attributes.iter().filter_map(|attribute| { +                        if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { +                            Some((ns_name, value)) +                        } else { +                            None +                        } +                    }) +                { +                    let prefix = match prefix { +                        xml::NSAttName::PrefixedAttName(prefixed_att_name) => { +                            Some(prefixed_att_name.to_string()) +                        } +                        xml::NSAttName::DefaultAttName => None, +                    }; +                    let namespace = Namespace { +                        prefix, +                        namespace: namespace.process()?, +                    }; +                    if !namespace_declarations.insert(namespace.clone()) { +                        return Err(Error::DuplicateNameSpace(namespace)); +                    } +                } + +                // all namespaces available to the element (from both parent elements and element itself) +                let namespace_stack: Vec<&Namespace> = namespaces +                    .iter() +                    .flatten() +                    .chain(namespace_declarations.iter()) +                    .collect(); + +                let mut attributes = HashMap::new(); + +                for (q_name, value) in empty_elem_tag.attributes.iter().filter_map(|attribute| { +                    if let xml::Attribute::Attribute { name, value } = attribute { +                        Some((name, value)) +                    } else { +                        None +                    } +                }) { +                    let namespace; +                    let attribute_name; +                    match q_name { +                        xml::QName::PrefixedName(prefixed_name) => { +                            namespace = namespace_stack.iter().rfind(|namespace| { +                                namespace.prefix.as_deref() == Some(**prefixed_name.prefix) +                            }); +                            attribute_name = prefixed_name.local_part.to_string(); +                        } +                        xml::QName::UnprefixedName(unprefixed_name) => { +                            namespace = namespace_stack +                                .iter() +                                .rfind(|namespace| namespace.prefix == None); +                            attribute_name = unprefixed_name.to_string(); +                        } +                    } +                    if let Some(namespace) = namespace { +                        let namespace = (*namespace).clone(); +                        let name = Name { +                            namespace, +                            name: attribute_name, +                        }; +                        let value = value.process()?; +                        if let Some(_value) = attributes.insert(name, value) { +                            return Err(Error::DuplicateAttribute(q_name.to_string())); +                        } +                    } else { +                        return Err(Error::UnqualifiedNamespace(q_name.to_string())); +                    } +                } + +                let name; +                let namespace; +                match &empty_elem_tag.name { +                    xml::QName::PrefixedName(prefixed_name) => { +                        namespace = namespace_stack.iter().rfind(|namespace| { +                            namespace.prefix.as_deref() == Some(**prefixed_name.prefix) +                        }); +                        name = prefixed_name.local_part.to_string(); +                    } +                    xml::QName::UnprefixedName(unprefixed_name) => { +                        namespace = namespace_stack +                            .iter() +                            .rfind(|namespace| namespace.prefix == None); +                        name = unprefixed_name.to_string(); +                    } +                } + +                let namespace = (*namespace +                    .ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?) +                .clone(); + +                let name = Name { namespace, name }; + +                return Ok(Element { +                    name, +                    namespace_decl: namespace_declarations, +                    attributes, +                    content: Vec::new(), +                }); +            } +            xml::Element::NotEmpty(s_tag, content, e_tag) => { +                if s_tag.name != e_tag.name { +                    return Err(Error::MismatchedEndTag( +                        s_tag.name.to_string(), +                        e_tag.name.to_string(), +                    )); +                } +                let mut namespace_declarations = HashSet::new(); +                for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| { +                    if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { +                        Some((ns_name, value)) +                    } else { +                        None +                    } +                }) { +                    let prefix = match prefix { +                        xml::NSAttName::PrefixedAttName(prefixed_att_name) => { +                            Some(prefixed_att_name.to_string()) +                        } +                        xml::NSAttName::DefaultAttName => None, +                    }; +                    let namespace = Namespace { +                        prefix, +                        namespace: namespace.process()?, +                    }; +                    if !namespace_declarations.insert(namespace.clone()) { +                        return Err(Error::DuplicateNameSpace(namespace)); +                    } +                } + +                // all namespaces available to the element (from both parent elements and element itself) +                let namespace_stack: Vec<&Namespace> = namespaces +                    .iter() +                    .flatten() +                    .chain(namespace_declarations.iter()) +                    .collect(); + +                let mut attributes = HashMap::new(); + +                for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| { +                    if let xml::Attribute::Attribute { name, value } = attribute { +                        Some((name, value)) +                    } else { +                        None +                    } +                }) { +                    let namespace; +                    let attribute_name; +                    match q_name { +                        xml::QName::PrefixedName(prefixed_name) => { +                            namespace = namespace_stack.iter().rfind(|namespace| { +                                namespace.prefix.as_deref() == Some(**prefixed_name.prefix) +                            }); +                            attribute_name = prefixed_name.local_part.to_string(); +                        } +                        xml::QName::UnprefixedName(unprefixed_name) => { +                            namespace = namespace_stack +                                .iter() +                                .rfind(|namespace| namespace.prefix == None); +                            attribute_name = unprefixed_name.to_string(); +                        } +                    } +                    if let Some(namespace) = namespace { +                        let namespace = (*namespace).clone(); +                        let name = Name { +                            namespace, +                            name: attribute_name, +                        }; +                        let value = value.process()?; +                        if let Some(_value) = attributes.insert(name, value) { +                            return Err(Error::DuplicateAttribute(q_name.to_string())); +                        } +                    } else { +                        return Err(Error::UnqualifiedNamespace(q_name.to_string())); +                    } +                } + +                let name; +                let namespace; +                match &s_tag.name { +                    xml::QName::PrefixedName(prefixed_name) => { +                        namespace = namespace_stack.iter().rfind(|namespace| { +                            namespace.prefix.as_deref() == Some(**prefixed_name.prefix) +                        }); +                        name = prefixed_name.local_part.to_string(); +                    } +                    xml::QName::UnprefixedName(unprefixed_name) => { +                        namespace = namespace_stack +                            .iter() +                            .rfind(|namespace| namespace.prefix == None); +                        name = unprefixed_name.to_string(); +                    } +                } + +                let namespace = (*namespace +                    .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?) +                .clone(); + +                let name = Name { namespace, name }; + +                namespaces.push(namespace_declarations.clone()); + +                let content = Self::content_from_xml(namespaces, content)?; + +                namespaces.pop(); + +                return Ok(Element { +                    name, +                    namespace_decl: namespace_declarations, +                    attributes, +                    content, +                }); +            } +        } +    } + +    fn content_from_xml( +        namespaces: &mut Vec<HashSet<Namespace>>, +        element: xml::Content, +    ) -> Result<Vec<Content>> { +        let mut content = Vec::new(); +        let mut text = element.char_data.map(|str| String::from(*str)); +        for (content_item, char_data) in element.content { +            match content_item { +                xml::ContentItem::Element(element) => { +                    text.map(|text| content.push(Content::Text(text))); +                    content.push(Content::Element(Self::element_from_xml( +                        namespaces, element, +                    )?)); +                    text = char_data.map(|str| String::from(*str)); +                } +                xml::ContentItem::Reference(reference) => { +                    let data = reference.process()?; +                    if let Some(text) = &mut text { +                        text.push(data) +                    } else { +                        text = Some(String::from(data)) +                    } +                    char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data))); +                } +                xml::ContentItem::CDSect(cd_sect) => { +                    if let Some(text) = &mut text { +                        text.push_str(**cd_sect) +                    } else { +                        text = Some(String::from(**cd_sect)) +                    } +                    char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data))); +                } +                // TODO: is this important? +                xml::ContentItem::PI(pi) => { +                    char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data))); +                } +                // TODO: comments? +                xml::ContentItem::Comment(comment) => { +                    char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data))); +                } +            } +        } +        text.map(|text| content.push(Content::Text(text))); +        todo!() +    } +} +  // impl<R> Reader<R>  // where  //     R: AsyncBufReadExt + Unpin, diff --git a/src/xml/mod.rs b/src/xml/mod.rs index f072fde..221c334 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -1,4 +1,6 @@ -use std::char; +use std::{char, ops::Deref}; + +use crate::error::Error;  pub mod composers;  pub mod parsers; @@ -14,40 +16,91 @@ pub enum NSAttName<'s> {  #[derive(Clone, Debug)]  pub struct PrefixedAttName<'s>(NCName<'s>); +impl<'s> Deref for PrefixedAttName<'s> { +    type Target = NCName<'s>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [3]   	DefaultAttName	   ::=   	'xmlns';  #[derive(Clone, Debug)]  pub struct DefaultAttName;  /// [4]   	NCName	   ::=   	Name - (Char* ':' Char*) -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)]  pub struct NCName<'s>(&'s str); +impl<'s> Deref for NCName<'s> { +    type Target = &'s str; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [7]   	QName	   ::=   	PrefixedName | UnprefixedName -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)]  pub enum QName<'s> {      PrefixedName(PrefixedName<'s>),      UnprefixedName(UnprefixedName<'s>),  } +impl<'s> ToString for QName<'s> { +    fn to_string(&self) -> String { +        match self { +            QName::PrefixedName(prefixed_name) => { +                format!("{}:{}", **prefixed_name.prefix, **prefixed_name.local_part) +            } +            QName::UnprefixedName(unprefixed_name) => unprefixed_name.to_string(), +        } +    } +} +  /// [8]   	PrefixedName	   ::=   	Prefix ':' LocalPart -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)]  pub struct PrefixedName<'s> { -    prefix: Prefix<'s>, -    local_part: LocalPart<'s>, +    pub(crate) prefix: Prefix<'s>, +    pub(crate) local_part: LocalPart<'s>,  }  /// [9]   	UnprefixedName	   ::=   	LocalPart -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)]  pub struct UnprefixedName<'s>(LocalPart<'s>); +impl<'s> Deref for UnprefixedName<'s> { +    type Target = LocalPart<'s>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [10]   	Prefix	   ::=   	NCName -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)]  pub struct Prefix<'s>(NCName<'s>); +impl<'s> Deref for Prefix<'s> { +    type Target = NCName<'s>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [11]   	LocalPart	   ::=   	NCName -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)]  pub struct LocalPart<'s>(NCName<'s>); +impl<'s> Deref for LocalPart<'s> { +    type Target = NCName<'s>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  // xml spec  /// [1]   	document	   ::=   	prolog element Misc* @@ -57,6 +110,14 @@ pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);  #[repr(transparent)]  pub struct Char(char); +impl Deref for Char { +    type Target = char; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [3]   	S	   ::=   	(#x20 | #x9 | #xD | #xA)+  #[derive(Clone)]  #[repr(transparent)] @@ -66,28 +127,76 @@ pub struct S;  #[repr(transparent)]  pub struct NameStartChar(char); +impl Deref for NameStartChar { +    type Target = char; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [4a]   	NameChar	   ::=   	NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]  #[repr(transparent)]  pub struct NameChar(char); +impl Deref for NameChar { +    type Target = char; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [5]   	Name	   ::=   	NameStartChar (NameChar)*  #[derive(Debug, Clone, PartialEq, Eq)]  #[repr(transparent)]  pub struct Name<'s>(&'s str); +impl<'s> Deref for Name<'s> { +    type Target = &'s str; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [6]   	Names	   ::=   	Name (#x20 Name)*  #[repr(transparent)]  pub struct Names<'s>(Vec<Name<'s>>); +impl<'s> Deref for Names<'s> { +    type Target = Vec<Name<'s>>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [7]   	Nmtoken	   ::=   	(NameChar)+  #[derive(Debug, Clone)]  #[repr(transparent)]  pub struct Nmtoken<'s>(&'s str); +impl<'s> Deref for Nmtoken<'s> { +    type Target = &'s str; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [8]   	Nmtokens	   ::=   	Nmtoken (#x20 Nmtoken)*  #[repr(transparent)]  pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>); +impl<'s> Deref for Nmtokens<'s> { +    type Target = Vec<Nmtoken<'s>>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  #[derive(Clone, Debug)]  pub enum EntityValueData<'s> {      String(&'s str), @@ -115,6 +224,24 @@ pub enum AttValue<'s> {      SingleQuoted(Vec<AttValueData<'s>>),  } +impl<'s> AttValue<'s> { +    pub fn process(&self) -> crate::Result<String> { +        let mut output = String::new(); +        let data; +        match self { +            AttValue::DoubleQuoted(vec) => data = vec, +            AttValue::SingleQuoted(vec) => data = vec, +        } +        for data in data { +            match data { +                AttValueData::String(s) => output.push_str(s), +                AttValueData::Reference(reference) => output.push(reference.process()?), +            } +        } +        Ok(output) +    } +} +  /// [11]   	SystemLiteral	   ::=   	('"' [^"]* '"') | ("'" [^']* "'")  #[derive(Debug)]  pub enum SystemLiteral<'s> { @@ -138,11 +265,27 @@ pub struct PubidChar(char);  #[repr(transparent)]  pub struct CharData<'s>(&'s str); +impl<'s> Deref for CharData<'s> { +    type Target = &'s str; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [15]    Comment	   ::=   	'<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'  #[derive(Clone, Debug, PartialEq, Eq)]  #[repr(transparent)]  pub struct Comment<'s>(&'s str); +impl<'s> Deref for Comment<'s> { +    type Target = &'s str; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [16]   	PI	   ::=   	'<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'  #[derive(Clone, Debug)]  pub struct PI<'s> { @@ -160,6 +303,14 @@ pub struct PITarget<'s>(Name<'s>);  #[repr(transparent)]  pub struct CDSect<'s>(CData<'s>); +impl<'s> Deref for CDSect<'s> { +    type Target = CData<'s>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [19]   	CDStart	   ::=   	'<![CDATA['  #[derive(Clone, Debug, PartialEq, Eq)]  pub struct CDStart; @@ -169,6 +320,14 @@ pub struct CDStart;  #[repr(transparent)]  pub struct CData<'s>(&'s str); +impl<'s> Deref for CData<'s> { +    type Target = &'s str; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [21]   	CDEnd	   ::=   	']]>'  #[derive(Clone, Debug, PartialEq, Eq)]  pub struct CDEnd; @@ -286,8 +445,8 @@ pub enum Element<'s> {  /// [40]   	STag	   ::=   	'<' Name (S Attribute)* S? '>'  #[derive(Debug, Clone)]  pub struct STag<'s> { -    name: QName<'s>, -    attributes: Vec<Attribute<'s>>, +    pub(crate) name: QName<'s>, +    pub(crate) attributes: Vec<Attribute<'s>>,  }  /// [15]   	Attribute	   ::=   	NSAttName Eq AttValue | QName Eq AttValue @@ -309,7 +468,7 @@ pub enum Attribute<'s> {  /// [42]   	ETag	   ::=   	'</' Name S? '>'  #[derive(Debug, Clone)]  pub struct ETag<'s> { -    name: QName<'s>, +    pub(crate) name: QName<'s>,  }  #[derive(Debug, Clone)] @@ -324,16 +483,16 @@ pub enum ContentItem<'s> {  /// [43]   	content	   ::=   	CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*  #[derive(Debug, Clone)]  pub struct Content<'s> { -    char_data: Option<CharData<'s>>, -    content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>, +    pub(crate) char_data: Option<CharData<'s>>, +    pub(crate) content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,  }  /// [14]   	EmptyElemTag	   ::=   	'<' QName (S Attribute)* S? '/>'  /// [44]   	EmptyElemTag	   ::=   	'<' Name (S Attribute)* S? '/>'	[WFC: Unique Att Spec]  #[derive(Debug, Clone)]  pub struct EmptyElemTag<'s> { -    name: QName<'s>, -    attributes: Vec<Attribute<'s>>, +    pub(crate) name: QName<'s>, +    pub(crate) attributes: Vec<Attribute<'s>>,  }  /// [17]   	elementdecl	   ::=   	'<!ELEMENT' S QName S contentspec S? '>' @@ -503,6 +662,32 @@ pub enum CharRef<'s> {      Hexadecimal(&'s str),  } +impl<'s> CharRef<'s> { +    pub fn process(&self) -> crate::Result<char> { +        let int: u32; +        match self { +            CharRef::Decimal(dec) => { +                int = dec.parse()?; +            } +            CharRef::Hexadecimal(hex) => { +                int = <u32>::from_str_radix(hex, 16)?; +            } +        } +        let c = std::char::from_u32(int); + +        let c = c.ok_or_else(|| Error::InvalidCharRef(int.to_string()))?; +        if matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}') +        { +            return Ok(c); +        } else { +            return Err(Error::InvalidCharRef(format!( +                "{} is not a valid xml char", +                c +            ))); +        }; +    } +} +  /// [67]   	Reference	   ::=   	EntityRef | CharRef  #[derive(Clone, Debug)]  pub enum Reference<'s> { @@ -510,10 +695,34 @@ pub enum Reference<'s> {      CharRef(CharRef<'s>),  } +impl<'s> Reference<'s> { +    pub fn process(&self) -> crate::Result<char> { +        match self { +            Reference::EntityRef(entity_ref) => match *entity_ref.deref().deref() { +                "amp" => Ok('&'), +                "lt" => Ok('<'), +                "gt" => Ok('>'), +                "apos" => Ok('\''), +                "quot" => Ok('"'), +                e => return Err(Error::EntityProcessError(e.to_string())), +            }, +            Reference::CharRef(char_ref) => Ok(char_ref.process()?), +        } +    } +} +  /// [68]   	EntityRef	   ::=   	'&' Name ';'  #[derive(Clone, Debug)]  pub struct EntityRef<'s>(Name<'s>); +impl<'s> Deref for EntityRef<'s> { +    type Target = Name<'s>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [69]   	PEReference	   ::=   	'%' Name ';'  #[derive(Clone, Debug)]  #[repr(transparent)] | 
