diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/error.rs | 1 | ||||
| -rw-r--r-- | src/reader.rs | 362 | ||||
| -rw-r--r-- | src/xml/composers.rs | 1 | ||||
| -rw-r--r-- | src/xml/mod.rs | 18 | ||||
| -rw-r--r-- | src/xml/parsers.rs | 17 | 
5 files changed, 305 insertions, 94 deletions
| diff --git a/src/error.rs b/src/error.rs index 1f9c1e6..c84c7d0 100644 --- a/src/error.rs +++ b/src/error.rs @@ -14,6 +14,7 @@ pub enum Error {      DuplicateAttribute(String),      UnqualifiedNamespace(String),      MismatchedEndTag(String, String), +    NotInElement(String),  }  impl From<std::io::Error> for Error { diff --git a/src/reader.rs b/src/reader.rs index bca8edd..dc16d31 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1,9 +1,11 @@  use circular::Buffer; -use futures::Stream; +use futures::{FutureExt, Stream};  use nom::Err;  use std::{      collections::{BTreeMap, HashMap, HashSet}, +    future::Future,      path::Prefix, +    pin::{pin, Pin},      str::{self, FromStr},  };  use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt}; @@ -42,14 +44,77 @@ impl<R> Reader<R>  where      R: AsyncRead + Unpin,  { -    async fn read_buf(&mut self) -> Result<usize> { +    async fn read_buf<'s>(&mut self) -> Result<usize> {          Ok(self.inner.read_buf(&mut self.buffer).await?)      } +    async fn read_prolog<'s>(&'s mut self) -> Result<()> { +        loop { +            self.read_buf().await?; +            let input = str::from_utf8(self.buffer.data())?; +            match xml::Prolog::parse(input) { +                Ok((rest, _prolog)) => { +                    let len = self.buffer.available_data() - rest.as_bytes().len(); +                    self.buffer.consume(len); +                    return Ok(()); +                } +                std::result::Result::Err(e) => match e { +                    Err::Incomplete(_) => {} +                    // TODO: better error +                    Err::Error(e) => return Err(Error::ParseError(e.to_string())), +                    Err::Failure(e) => return Err(Error::ParseError(e.to_string())), +                }, +            } +        } +    } + +    async fn read_start_tag<'s>(&'s mut self) -> Result<Element> { +        loop { +            self.read_buf().await?; +            let input = str::from_utf8(self.buffer.data())?; +            match xml::STag::parse(input) { +                Ok((rest, e)) => { +                    let len = self.buffer.available_data() - rest.as_bytes().len(); +                    let element = +                        Reader::<R>::start_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?; +                    self.buffer.consume(len); +                    return Ok(element); +                } +                std::result::Result::Err(e) => match e { +                    Err::Incomplete(_) => {} +                    // TODO: better error +                    Err::Error(e) => return Err(Error::ParseError(e.to_string())), +                    Err::Failure(e) => return Err(Error::ParseError(e.to_string())), +                }, +            } +        } +    } + +    async fn read_end_tag<'s>(&'s mut self) -> Result<()> { +        loop { +            self.read_buf().await?; +            let input = str::from_utf8(self.buffer.data())?; +            match xml::ETag::parse(input) { +                Ok((rest, e)) => { +                    let len = self.buffer.available_data() - rest.as_bytes().len(); +                    Reader::<R>::end_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?; +                    self.buffer.consume(len); +                    return Ok(()); +                } +                std::result::Result::Err(e) => match e { +                    Err::Incomplete(_) => {} +                    // TODO: better error +                    Err::Error(e) => return Err(Error::ParseError(e.to_string())), +                    Err::Failure(e) => return Err(Error::ParseError(e.to_string())), +                }, +            } +        } +    } +      async fn read_element<'s>(&'s mut self) -> Result<Element> { -        self.read_buf().await?; -        let mut input = str::from_utf8(self.buffer.data())?;          loop { +            self.read_buf().await?; +            let input = str::from_utf8(self.buffer.data())?;              match xml::Element::parse(input) {                  Ok((rest, e)) => {                      let len = self.buffer.available_data() - rest.as_bytes().len(); @@ -58,10 +123,37 @@ where                      return Ok(element);                  }                  std::result::Result::Err(e) => match e { -                    Err::Incomplete(_) => { -                        self.read_buf().await?; -                        input = str::from_utf8(self.buffer.data())?; +                    Err::Incomplete(_) => {} +                    // TODO: better error +                    Err::Error(e) => return Err(Error::ParseError(e.to_string())), +                    Err::Failure(e) => return Err(Error::ParseError(e.to_string())), +                }, +            } +        } +    } + +    async fn read_content<'s>(&'s mut self) -> Result<Content> { +        loop { +            self.read_buf().await?; +            let input = str::from_utf8(self.buffer.data())?; + +            match xml::ContentItem::parse(input) { +                Ok((rest, c)) => { +                    match c { +                        xml::ContentItem::CharData(char_data) => todo!(), +                        xml::ContentItem::Element(element) => todo!(), +                        xml::ContentItem::Reference(reference) => todo!(), +                        xml::ContentItem::CDSect(cdsect) => todo!(), +                        xml::ContentItem::PI(pi) => todo!(), +                        xml::ContentItem::Comment(comment) => todo!(),                      } +                    let len = self.buffer.available_data() - rest.as_bytes().len(); +                    let content = Reader::<R>::content_item_from_xml(&mut self.namespaces, e)?; +                    self.buffer.consume(len); +                    return Ok(element); +                } +                std::result::Result::Err(e) => match e { +                    Err::Incomplete(_) => {}                      // TODO: better error                      Err::Error(e) => return Err(Error::ParseError(e.to_string())),                      Err::Failure(e) => return Err(Error::ParseError(e.to_string())), @@ -72,6 +164,145 @@ where  }  impl<R> Reader<R> { +    fn content_item_from_xml( +        namespaces: &mut Vec<HashSet<Namespace>>, +        item: xml::ContentItem, +    ) -> Result<Content> { +        todo!() +    } + +    fn start_tag_from_xml( +        depth: &mut Vec<Name>, +        namespaces: &mut Vec<HashSet<Namespace>>, +        s_tag: xml::STag, +    ) -> Result<Element> { +        let mut namespace_declarations = HashSet::new(); +        for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| { +            if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { +                Some((ns_name, value)) +            } else { +                None +            } +        }) { +            let prefix = match prefix { +                xml::NSAttName::PrefixedAttName(prefixed_att_name) => { +                    Some(prefixed_att_name.to_string()) +                } +                xml::NSAttName::DefaultAttName => None, +            }; +            let namespace = Namespace { +                prefix, +                namespace: namespace.process()?, +            }; +            if !namespace_declarations.insert(namespace.clone()) { +                return Err(Error::DuplicateNameSpace(namespace)); +            } +        } + +        // all namespaces available to the element (from both parent elements and element itself) +        let namespace_stack: Vec<&Namespace> = namespaces +            .iter() +            .flatten() +            .chain(namespace_declarations.iter()) +            .collect(); + +        let mut attributes = HashMap::new(); + +        for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| { +            if let xml::Attribute::Attribute { name, value } = attribute { +                Some((name, value)) +            } else { +                None +            } +        }) { +            let namespace; +            let attribute_name; +            match q_name { +                xml::QName::PrefixedName(prefixed_name) => { +                    namespace = namespace_stack.iter().rfind(|namespace| { +                        namespace.prefix.as_deref() == Some(**prefixed_name.prefix) +                    }); +                    attribute_name = prefixed_name.local_part.to_string(); +                } +                xml::QName::UnprefixedName(unprefixed_name) => { +                    namespace = namespace_stack +                        .iter() +                        .rfind(|namespace| namespace.prefix == None); +                    attribute_name = unprefixed_name.to_string(); +                } +            } +            if let Some(namespace) = namespace { +                let namespace = (*namespace).clone(); +                let name = Name { +                    namespace, +                    name: attribute_name, +                }; +                let value = value.process()?; +                if let Some(_value) = attributes.insert(name, value) { +                    return Err(Error::DuplicateAttribute(q_name.to_string())); +                } +            } else { +                return Err(Error::UnqualifiedNamespace(q_name.to_string())); +            } +        } + +        let name; +        let namespace; +        match &s_tag.name { +            xml::QName::PrefixedName(prefixed_name) => { +                namespace = namespace_stack +                    .iter() +                    .rfind(|namespace| namespace.prefix.as_deref() == Some(**prefixed_name.prefix)); +                name = prefixed_name.local_part.to_string(); +            } +            xml::QName::UnprefixedName(unprefixed_name) => { +                namespace = namespace_stack +                    .iter() +                    .rfind(|namespace| namespace.prefix == None); +                name = unprefixed_name.to_string(); +            } +        } + +        let namespace = (*namespace +            .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?) +        .clone(); + +        let name = Name { namespace, name }; + +        depth.push(name.clone()); + +        namespaces.push(namespace_declarations.clone()); + +        return Ok(Element { +            name, +            namespace_decl: namespace_declarations, +            attributes, +            content: Vec::new(), +        }); +    } + +    fn end_tag_from_xml( +        depth: &mut Vec<Name>, +        namespaces: &mut Vec<HashSet<Namespace>>, +        e_tag: xml::ETag, +    ) -> Result<()> { +        if let Some(s_tag_name) = depth.pop() { +            if s_tag_name.namespace.prefix.as_deref() == e_tag.name.prefix() +                && s_tag_name.name == e_tag.name.local_part() +            { +                namespaces.pop(); +                return Ok(()); +            } else { +                return Err(Error::MismatchedEndTag( +                    s_tag_name.name, +                    e_tag.name.to_string(), +                )); +            } +        } else { +            return Err(Error::NotInElement(e_tag.name.to_string())); +        } +    } +      fn element_from_xml(          namespaces: &mut Vec<HashSet<Namespace>>,          element: xml::Element, @@ -343,88 +574,22 @@ impl<R> Reader<R> {      }  } -// impl<R> Reader<R> -// where -//     R: AsyncBufReadExt + Unpin, -// { -//     /// could resursively read and include namespace tree with values to be shadowed within new local context -//     async fn read_recursive(&mut self, namespaces: BTreeMap<Option<String>, String>) -> Result<Element, Error> { -//         let element; -//         let len; -//         loop { -//             let buf = self.inner.fill_buf().await?; -//             let input = str::from_utf8(buf)?; -//             match crate::xml::element(input) { -//                 Ok((rest, e)) => { -//                     element = e; -//                     len = buf.len() - rest.len(); -//                     break; -//                 } -//                 Err(e) => match e { -//                     Err::Incomplete(_) => (), -//                     e => return Err::<E, Error>(Error::ParseError(input.to_owned())), -//                 }, -//             } -//         } - -//         let final; -//         match element { -//             crate::xml::Element::Empty(e) => { -//                 let final = Element { - -//                 } -//             }, -//             crate::xml::Element::NotEmpty(_, _, _) => todo!(), -//         } - -//         self.inner.consume(len); -//         todo!() -//     } -// /// reads entire next prolog, element, or misc -// pub async fn read<E: From<Element>>(&mut self) -> Result<E, Error> { -//     let element; -//     let len; -//     loop { -//         let buf = self.inner.fill_buf().await?; -//         let input = str::from_utf8(buf)?; -//         match crate::xml::element(input) { -//             Ok((rest, e)) => { -//                 element = e; -//                 len = buf.len() - rest.len(); -//                 break; -//             } -//             Err(e) => match e { -//                 Err::Incomplete(_) => (), -//                 e => return Err::<E, Error>(Error::ParseError(input.to_owned())), -//             }, -//         } -//     } -//     self.inner.consume(len); - -//     // Ok(element) -//     todo!() -// } -// pub async fn read_start(&self) -> Result<impl From<Element>, Error> { -//     todo!() -// } -// pub async fn read_end(&self) -> Result<(), Error> { -//     todo!() -// } -// } - -// impl<R: AsyncBufRead> Stream for Reader<R> { -//     type Item = impl From<Element>; - -//     async fn poll_next( -//         self: std::pin::Pin<&mut Self>, -//         cx: &mut std::task::Context<'_>, -//     ) -> std::task::Poll<Option<Self::Item>> { -//         todo!() -//     } -// } +impl<R: AsyncRead + Unpin> Stream for Reader<R> { +    type Item = Result<Content>; + +    fn poll_next( +        self: std::pin::Pin<&mut Self>, +        cx: &mut std::task::Context<'_>, +    ) -> std::task::Poll<Option<Self::Item>> { +        let mut e = self; +        let mut pinned = pin!(e.read_content()); +        pinned.as_mut().poll(cx).map(|result| Some(result)) +    } +}  #[cfg(test)]  mod test { +    use futures::{sink::Buffer, StreamExt};      use tokio::io::AsyncRead;      use super::Reader; @@ -448,10 +613,7 @@ mod test {          }      } -    #[tokio::test] -    async fn test_element_read() { -        let mock = MockAsyncReader::new( -            "<xs:schema +    const TEST_DOC: &'static str = "<xs:schema         xmlns:xs='http://www.w3.org/2001/XMLSchema'         targetNamespace='http://etherx.jabber.org/streams'         xmlns='http://etherx.jabber.org/streams' @@ -529,11 +691,25 @@ mod test {         </xs:complexType>       </xs:element> -   </xs:schema>asdf -", -        ); +   </xs:schema>asdf"; + +    #[tokio::test] +    async fn test_element_read() { +        let mock = MockAsyncReader::new(TEST_DOC);          let mut reader = Reader::new(mock);          let element = reader.read_element().await.unwrap();          println!("{:#?}", element);      } + +    #[tokio::test] +    async fn test_element_stream() { +        let mock = MockAsyncReader::new(TEST_DOC); +        let mut reader = Reader::new(mock); +        let element = reader.read_start_tag().await.unwrap(); +        println!("{:#?}", element); +        loop { +            let element = reader.next().await.unwrap(); +            println!("{:#?}", element); +        } +    }  } diff --git a/src/xml/composers.rs b/src/xml/composers.rs index 949bb65..b8fbe13 100644 --- a/src/xml/composers.rs +++ b/src/xml/composers.rs @@ -817,6 +817,7 @@ impl<'s> Composer<'s> for Content<'s> {                  ContentItem::CDSect(cd_sect) => cd_sect.write(writer).await?,                  ContentItem::PI(pi) => pi.write(writer).await?,                  ContentItem::Comment(comment) => comment.write(writer).await?, +                _ => todo!("verify no split chardata"),              }              if let Some(char_data) = char_data {                  char_data.write(writer).await?; diff --git a/src/xml/mod.rs b/src/xml/mod.rs index 221c334..9424d0b 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -47,6 +47,22 @@ pub enum QName<'s> {      UnprefixedName(UnprefixedName<'s>),  } +impl<'s> QName<'s> { +    pub fn prefix(&self) -> Option<&'s str> { +        match self { +            QName::PrefixedName(prefixed_name) => return Some(**prefixed_name.prefix), +            QName::UnprefixedName(_) => return None, +        } +    } + +    pub fn local_part(&self) -> &str { +        match self { +            QName::PrefixedName(prefixed_name) => return **prefixed_name.local_part, +            QName::UnprefixedName(unprefixed_name) => return ****unprefixed_name, +        } +    } +} +  impl<'s> ToString for QName<'s> {      fn to_string(&self) -> String {          match self { @@ -473,7 +489,7 @@ pub struct ETag<'s> {  #[derive(Debug, Clone)]  pub enum ContentItem<'s> { -    // CharData(&'s str), +    CharData(CharData<'s>),      Element(Element<'s>),      Reference(Reference<'s>),      CDSect(CDSect<'s>), diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs index 3f67be7..3cbefd3 100644 --- a/src/xml/parsers.rs +++ b/src/xml/parsers.rs @@ -733,6 +733,23 @@ impl<'s> Parser<'s, ETag<'s>> for ETag<'s> {      }  } +impl<'s> Parser<'s, ContentItem<'s>> for ContentItem<'s> { +    fn parse(input: &'s str) -> IResult<&str, ContentItem<'s>> { +        alt(( +            map(CharData::parse, |char_data| { +                ContentItem::CharData(char_data) +            }), +            map(Element::parse, |element| ContentItem::Element(element)), +            map(Reference::parse, |reference| { +                ContentItem::Reference(reference) +            }), +            map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)), +            map(PI::parse, |pi| ContentItem::PI(pi)), +            map(Comment::parse, |comment| ContentItem::Comment(comment)), +        ))(input) +    } +} +  /// [43]   	content	   ::=   	CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*  impl<'s> Parser<'s, Content<'s>> for Content<'s> {      fn parse(input: &'s str) -> IResult<&str, Content<'s>> { | 
