diff options
| author | 2024-11-24 02:05:41 +0000 | |
|---|---|---|
| committer | 2024-11-24 02:05:41 +0000 | |
| commit | 87e6ff405b0d687ed341f304fba7c5b391a49359 (patch) | |
| tree | f56ddd5271fb2bb104f641c035e58a744038f5cf | |
| parent | c1e6f7e918eacaad9c8b1a4b27fcd4d6245aaf68 (diff) | |
| download | peanuts-87e6ff405b0d687ed341f304fba7c5b391a49359.tar.gz peanuts-87e6ff405b0d687ed341f304fba7c5b391a49359.tar.bz2 peanuts-87e6ff405b0d687ed341f304fba7c5b391a49359.zip | |
misc
| -rw-r--r-- | src/declaration.rs | 21 | ||||
| -rw-r--r-- | src/element.rs | 19 | ||||
| -rw-r--r-- | src/error.rs | 8 | ||||
| -rw-r--r-- | src/lib.rs | 11 | ||||
| -rw-r--r-- | src/reader.rs | 108 | ||||
| -rw-r--r-- | src/writer.rs | 75 | ||||
| -rw-r--r-- | src/xml/mod.rs | 44 | 
7 files changed, 240 insertions, 46 deletions
| diff --git a/src/declaration.rs b/src/declaration.rs new file mode 100644 index 0000000..2c0855f --- /dev/null +++ b/src/declaration.rs @@ -0,0 +1,21 @@ +pub struct Declaration { +    pub version_info: VersionInfo, +    pub encoding_decl: Option<String>, +    pub sd_decl: Option<bool>, +} + +#[derive(Clone, Copy)] +pub enum VersionInfo { +    One, +    OneDotOne, +} + +impl Declaration { +    pub fn version(version: VersionInfo) -> Self { +        Self { +            version_info: version, +            encoding_decl: None, +            sd_decl: None, +        } +    } +} diff --git a/src/element.rs b/src/element.rs index 04f2e5e..2b149a8 100644 --- a/src/element.rs +++ b/src/element.rs @@ -9,8 +9,22 @@ use std::{  use crate::{      error::Error,      xml::{self, parsers_complete::Parser, Attribute}, +    Result,  }; +pub trait FromElement: Sized { +    fn from_element(element: Element) -> Result<Self>; +} + +pub trait IntoElement { +    fn into_element(&self) -> Element; + +    fn get_content(&self) -> Vec<Content> { +        let element = self.into_element(); +        element.content +    } +} +  // when are namespaces names chosen then if they are automatically calculated  // namespaces are held by readers and writers.  #[derive(PartialEq, Eq, Hash, Clone, Debug)] @@ -26,7 +40,7 @@ pub struct Name {      pub local_name: String,  } -#[derive(Debug)] +#[derive(Debug, Clone)]  pub enum Content {      Element(Element),      Text(String), @@ -35,7 +49,7 @@ pub enum Content {  }  // should this be a trait? -#[derive(Debug)] +#[derive(Debug, Clone)]  pub struct Element {      pub name: Name,      // namespace: Name, @@ -51,6 +65,7 @@ pub struct Element {      // you can validate the prefix and calculate the namespace from the current reader context      // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.      pub attributes: HashMap<Name, String>, +    // TODO: make a hashmap maybe? to be able to address parts of the content individually      pub content: Vec<Content>,  } diff --git a/src/error.rs b/src/error.rs index 69993ed..eda527e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,6 +1,6 @@  use std::{num::ParseIntError, str::Utf8Error}; -use crate::element::{Name, NamespaceDeclaration}; +use crate::element::{Content, Name, NamespaceDeclaration};  #[derive(Debug)]  pub enum Error { @@ -17,6 +17,12 @@ pub enum Error {      NotInElement(String),      ExtraData(String),      UndeclaredNamespace(String), +    IncorrectName(Name), +    UnexpectedAttribute(Name), +    DeserializeError(String), +    UnexpectedNumberOfContents(usize), +    UnexpectedContent(Content), +    UnexpectedElement(Name),  }  impl From<std::io::Error> for Error { @@ -1,7 +1,16 @@ -mod element; +pub mod declaration; +pub mod element;  mod error;  pub mod reader;  mod writer;  pub mod xml;  pub type Result<T> = std::result::Result<T, error::Error>; + +pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace"; +pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/"; + +pub use element::Element; +pub use error::Error; +pub use reader::Reader; +pub use writer::Writer; diff --git a/src/reader.rs b/src/reader.rs index f1f3744..ee8d491 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -2,7 +2,7 @@ use circular::Buffer;  use futures::{FutureExt, Stream};  use nom::Err;  use std::{ -    collections::{BTreeMap, HashMap, HashSet}, +    collections::{hash_set, BTreeMap, HashMap, HashSet},      future::Future,      path::Prefix,      pin::{pin, Pin}, @@ -10,19 +10,20 @@ use std::{  };  use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt}; -static MAX_STANZA_SIZE: usize = 65536; -  use crate::{ -    element::{Content, Element, Name, NamespaceDeclaration}, +    declaration::{Declaration, VersionInfo}, +    element::{Content, Element, FromElement, Name, NamespaceDeclaration},      error::Error,      xml::{self, parsers::Parser}, -    Result, +    Result, XMLNS_NS, XML_NS,  }; +static MAX_STANZA_SIZE: usize = 65536; +  /// streaming reader that tracks depth and available namespaces at current depth  pub struct Reader<R> {      inner: R, -    buffer: Buffer, +    pub buffer: Buffer,      // holds which tags we are in atm over depth      // to have names reference namespaces could      depth: Vec<Name>, @@ -31,13 +32,27 @@ pub struct Reader<R> {  impl<R> Reader<R> {      pub fn new(reader: R) -> Self { +        let mut default_declarations = HashSet::new(); +        default_declarations.insert(NamespaceDeclaration { +            prefix: Some("xml".to_string()), +            namespace: XML_NS.to_string(), +        }); +        default_declarations.insert(NamespaceDeclaration { +            prefix: Some("xmlns".to_string()), +            namespace: XMLNS_NS.to_string(), +        });          Self {              inner: reader,              buffer: Buffer::with_capacity(MAX_STANZA_SIZE),              depth: Vec::new(), -            namespace_declarations: Vec::new(), +            // TODO: make sure reserved namespaces are never overwritten +            namespace_declarations: vec![default_declarations],          }      } + +    pub fn into_inner(self) -> R { +        self.inner +    }  }  impl<R> Reader<R> @@ -48,18 +63,35 @@ where          Ok(self.inner.read_buf(&mut self.buffer).await?)      } -    pub async fn read_prolog<'s>(&'s mut self) -> Result<()> { +    pub async fn read_prolog<'s>(&'s mut self) -> Result<Option<Declaration>> {          loop { -            self.read_buf().await?;              let input = str::from_utf8(self.buffer.data())?;              match xml::Prolog::parse(input) { -                Ok((rest, _prolog)) => { +                Ok((rest, (decl, _misc, _doctype_decl))) => {                      let len = self.buffer.available_data() - rest.as_bytes().len(); -                    self.buffer.consume(len); -                    return Ok(()); +                    // TODO: return error if there is a doctype decl +                    if let Some(decl) = decl { +                        let declaration = Declaration { +                            version_info: match *decl.version_info { +                                xml::VersionNum::One => VersionInfo::One, +                                xml::VersionNum::OneDotOne => VersionInfo::OneDotOne, +                            }, +                            encoding_decl: decl +                                .encoding_decl +                                .map(|encoding_decl| (**encoding_decl).to_string()), +                            sd_decl: decl.sd_decl.map(|sd_decl| *sd_decl), +                        }; +                        self.buffer.consume(len); +                        return Ok(Some(declaration)); +                    } else { +                        self.buffer.consume(len); +                        return Ok(None); +                    }                  }                  std::result::Result::Err(e) => match e { -                    Err::Incomplete(_) => {} +                    Err::Incomplete(_) => { +                        self.read_buf().await?; +                    }                      // TODO: better error                      Err::Error(e) => return Err(Error::ParseError(e.to_string())),                      Err::Failure(e) => return Err(Error::ParseError(e.to_string())), @@ -68,9 +100,18 @@ where          }      } +    pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> { +        let element = self.read_start_tag().await?; +        FromElement::from_element(element) +    } + +    pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> { +        let element = self.read_element().await?; +        FromElement::from_element(element) +    } +      pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {          loop { -            self.read_buf().await?;              let input = str::from_utf8(self.buffer.data())?;              match xml::STag::parse(input) {                  Ok((rest, e)) => { @@ -84,7 +125,9 @@ where                      return Ok(element);                  }                  std::result::Result::Err(e) => match e { -                    Err::Incomplete(_) => {} +                    Err::Incomplete(_) => { +                        self.read_buf().await?; +                    }                      // TODO: better error                      Err::Error(e) => return Err(Error::ParseError(e.to_string())),                      Err::Failure(e) => return Err(Error::ParseError(e.to_string())), @@ -95,7 +138,6 @@ where      pub async fn read_end_tag<'s>(&'s mut self) -> Result<()> {          loop { -            self.read_buf().await?;              let input = str::from_utf8(self.buffer.data())?;              match xml::ETag::parse(input) {                  Ok((rest, e)) => { @@ -109,7 +151,9 @@ where                      return Ok(());                  }                  std::result::Result::Err(e) => match e { -                    Err::Incomplete(_) => {} +                    Err::Incomplete(_) => { +                        self.read_buf().await?; +                    }                      // TODO: better error                      Err::Error(e) => return Err(Error::ParseError(e.to_string())),                      Err::Failure(e) => return Err(Error::ParseError(e.to_string())), @@ -120,7 +164,6 @@ where      pub async fn read_element<'s>(&'s mut self) -> Result<Element> {          loop { -            self.read_buf().await?;              let input = str::from_utf8(self.buffer.data())?;              match xml::Element::parse(input) {                  Ok((rest, e)) => { @@ -131,7 +174,9 @@ where                      return Ok(element);                  }                  std::result::Result::Err(e) => match e { -                    Err::Incomplete(_) => {} +                    Err::Incomplete(_) => { +                        self.read_buf().await?; +                    }                      // TODO: better error                      Err::Error(e) => return Err(Error::ParseError(e.to_string())),                      Err::Failure(e) => return Err(Error::ParseError(e.to_string())), @@ -144,7 +189,6 @@ where          let mut last_char = false;          let mut text = String::new();          loop { -            self.read_buf().await?;              let input = str::from_utf8(self.buffer.data())?;              if last_char == false {                  match xml::CharData::parse(input) { @@ -155,7 +199,9 @@ where                          last_char = true;                      }                      std::result::Result::Err(e) => match e { -                        Err::Incomplete(_needed) => continue, +                        Err::Incomplete(_) => { +                            self.read_buf().await?; +                        }                          _ => match xml::ContentItem::parse(input) {                              Ok((rest, content_item)) => match content_item {                                  xml::ContentItem::Element(element) => { @@ -207,7 +253,9 @@ where                                  }                              },                              std::result::Result::Err(e) => match e { -                                Err::Incomplete(_) => continue, +                                Err::Incomplete(_) => { +                                    self.read_buf().await?; +                                }                                  // TODO: better error                                  Err::Error(e) => return Err(Error::ParseError(e.to_string())),                                  Err::Failure(e) => return Err(Error::ParseError(e.to_string())), @@ -263,7 +311,9 @@ where                          }                      },                      std::result::Result::Err(e) => match e { -                        Err::Incomplete(_) => continue, +                        Err::Incomplete(_) => { +                            self.read_buf().await?; +                        }                          // TODO: better error                          Err::Error(e) => return Err(Error::ParseError(e.to_string())),                          Err::Failure(e) => return Err(Error::ParseError(e.to_string())), @@ -622,7 +672,11 @@ impl<R> Reader<R> {          for (content_item, char_data) in xml_content.content {              match content_item {                  xml::ContentItem::Element(element) => { -                    text.map(|text| content.push(Content::Text(text))); +                    text.map(|text| { +                        if !text.is_empty() { +                            content.push(Content::Text(text)) +                        } +                    });                      content.push(Content::Element(Self::element_from_xml(                          namespaces, element,                      )?)); @@ -655,7 +709,11 @@ impl<R> Reader<R> {                  }              }          } -        text.map(|text| content.push(Content::Text(text))); +        text.map(|text| { +            if !text.is_empty() { +                content.push(Content::Text(text)) +            } +        });          Ok(content)      }  } diff --git a/src/writer.rs b/src/writer.rs index dc5b48a..e319fdc 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -5,10 +5,11 @@ use futures::Sink;  use tokio::io::{AsyncWrite, AsyncWriteExt};  use crate::{ -    element::{escape_str, Content, Element, Name, NamespaceDeclaration}, +    declaration::{Declaration, VersionInfo}, +    element::{escape_str, Content, Element, IntoElement, Name, NamespaceDeclaration},      error::Error, -    xml::{self, composers::Composer, parsers_complete::Parser, ETag}, -    Result, +    xml::{self, composers::Composer, parsers_complete::Parser, ETag, XMLDecl}, +    Result, XMLNS_NS, XML_NS,  };  // pub struct Writer<W, C = Composer> { @@ -20,21 +21,69 @@ pub struct Writer<W> {  impl<W> Writer<W> {      pub fn new(writer: W) -> Self { +        let mut default_declarations = HashSet::new(); +        default_declarations.insert(NamespaceDeclaration { +            prefix: Some("xml".to_string()), +            namespace: XML_NS.to_string(), +        }); +        default_declarations.insert(NamespaceDeclaration { +            prefix: Some("xmlns".to_string()), +            namespace: XMLNS_NS.to_string(), +        });          Self {              inner: writer,              depth: Vec::new(), -            namespace_declarations: Vec::new(), +            namespace_declarations: vec![default_declarations],          }      } + +    pub fn into_inner(self) -> W { +        self.inner +    }  }  impl<W: AsyncWrite + Unpin + Send> Writer<W> { +    pub async fn write_declaration(&mut self, version: VersionInfo) -> Result<()> { +        let declaration = Declaration::version(version); +        let version_info; +        match declaration.version_info { +            VersionInfo::One => version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::One), +            VersionInfo::OneDotOne => { +                version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::OneDotOne) +            } +        } +        let declaration = xml::XMLDecl { +            version_info, +            encoding_decl: None, +            sd_decl: None, +        }; +        declaration.write(&mut self.inner).await?; +        Ok(()) +    } + +    pub async fn write_full(&mut self, into_element: &impl IntoElement) -> Result<()> { +        let element = into_element.into_element(); +        Ok(self.write_element(&element).await?) +    } + +    pub async fn write_start(&mut self, into_element: &impl IntoElement) -> Result<()> { +        let element = into_element.into_element(); +        Ok(self.write_element_start(&element).await?) +    } + +    pub async fn write_all_content(&mut self, into_element: &impl IntoElement) -> Result<()> { +        for content in &into_element.get_content() { +            self.write_content(content).await?; +        } +        Ok(()) +    } +      #[async_recursion]      pub async fn write_element(&mut self, element: &Element) -> Result<()> {          if element.content.is_empty() {              self.write_empty(element).await?;          } else { -            self.write_start(element).await?; +            self.write_element_start(element).await?;              for content in &element.content {                  self.write_content(content).await?;              } @@ -107,12 +156,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {              if let Some(prefix) = &prefix {                  att_name = xml::QName::PrefixedName(xml::PrefixedName {                      prefix: xml::Prefix::parse_full(prefix)?, -                    local_part: xml::LocalPart::parse_full(&element.name.local_name)?, +                    local_part: xml::LocalPart::parse_full(&name.local_name)?,                  })              } else { -                att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full( -                    &element.name.local_name, -                )?) +                att_name = +                    xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)              }              let value = xml::AttValue::from(value.as_str()); @@ -131,7 +179,7 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {          Ok(())      } -    pub async fn write_start(&mut self, element: &Element) -> Result<()> { +    pub async fn write_element_start(&mut self, element: &Element) -> Result<()> {          let namespace_declarations_stack: Vec<_> = self              .namespace_declarations              .iter() @@ -195,12 +243,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {              if let Some(prefix) = &prefix {                  att_name = xml::QName::PrefixedName(xml::PrefixedName {                      prefix: xml::Prefix::parse_full(prefix)?, -                    local_part: xml::LocalPart::parse_full(&element.name.local_name)?, +                    local_part: xml::LocalPart::parse_full(&name.local_name)?,                  })              } else { -                att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full( -                    &element.name.local_name, -                )?) +                att_name = +                    xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)              }              let value = xml::AttValue::from(value.as_str()); diff --git a/src/xml/mod.rs b/src/xml/mod.rs index 3150df0..43f3027 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -389,9 +389,9 @@ pub type Prolog<'s> = (  /// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'  #[derive(Debug)]  pub struct XMLDecl<'s> { -    version_info: VersionInfo, -    encoding_decl: Option<EncodingDecl<'s>>, -    sd_decl: Option<SDDecl>, +    pub(crate) version_info: VersionInfo, +    pub(crate) encoding_decl: Option<EncodingDecl<'s>>, +    pub(crate) sd_decl: Option<SDDecl>,  }  /// [24]   	VersionInfo	   ::=   	S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') @@ -401,6 +401,17 @@ pub enum VersionInfo {      DoubleQuoted(VersionNum),  } +impl Deref for VersionInfo { +    type Target = VersionNum; + +    fn deref(&self) -> &Self::Target { +        match self { +            VersionInfo::SingleQuoted(version_num) => version_num, +            VersionInfo::DoubleQuoted(version_num) => version_num, +        } +    } +} +  /// [25]   	Eq	   ::=   	S? '=' S?  #[derive(Clone)]  pub struct Eq; @@ -479,6 +490,17 @@ pub enum SDDecl {      DoubleQuoted(bool),  } +impl Deref for SDDecl { +    type Target = bool; + +    fn deref(&self) -> &Self::Target { +        match self { +            SDDecl::SingleQuoted(b) => b, +            SDDecl::DoubleQuoted(b) => b, +        } +    } +} +  // (Productions 33 through 38 have been removed.)  /// [39]   	element	   ::=   	EmptyElemTag | STag content ETag @@ -846,10 +868,26 @@ pub struct ExtParsedEnt<'s> {  // TODO?: select quote version  pub struct EncodingDecl<'s>(EncName<'s>); +impl<'s> Deref for EncodingDecl<'s> { +    type Target = EncName<'s>; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  /// [81]   	EncName	   ::=   	[A-Za-z] ([A-Za-z0-9._] | '-')*  #[derive(Debug)]  pub struct EncName<'s>(&'s str); +impl<'s> Deref for EncName<'s> { +    type Target = &'s str; + +    fn deref(&self) -> &Self::Target { +        &self.0 +    } +} +  #[derive(Debug)]  pub enum NotationDeclID<'s> {      External(ExternalID<'s>), | 
