diff options
| author | 2024-11-28 18:00:30 +0000 | |
|---|---|---|
| committer | 2024-11-28 18:00:30 +0000 | |
| commit | aa940a8eac74aca8cd3c202a05092538d1140dda (patch) | |
| tree | d14997232d48ca723dc2486f0a6346421c9af163 | |
| parent | 381af38a0910ca42ccb36568ebbcd147cfbd237b (diff) | |
| download | peanuts-aa940a8eac74aca8cd3c202a05092538d1140dda.tar.gz peanuts-aa940a8eac74aca8cd3c202a05092538d1140dda.tar.bz2 peanuts-aa940a8eac74aca8cd3c202a05092538d1140dda.zip | |
create element builder and refactor api
Diffstat (limited to '')
| -rw-r--r-- | src/element.rs | 537 | ||||
| -rw-r--r-- | src/error.rs | 26 | ||||
| -rw-r--r-- | src/lib.rs | 1 | ||||
| -rw-r--r-- | src/reader.rs | 20 | 
4 files changed, 567 insertions, 17 deletions
| diff --git a/src/element.rs b/src/element.rs index 1c04c98..98a3315 100644 --- a/src/element.rs +++ b/src/element.rs @@ -1,25 +1,33 @@  // elements resemble a final tree, including inherited namespace information +#![feature(drain_filter)] +  use std::{ -    collections::{HashMap, HashSet}, +    collections::{HashMap, HashSet, VecDeque},      convert::Infallible,      str::FromStr,  };  use crate::{ -    error::Error, +    error::{DeserializeError, Error},      xml::{self, parsers_complete::Parser, Attribute},      Result,  }; +pub type DeserializeResult<T> = std::result::Result<T, DeserializeError>; +  pub trait FromElement: Sized { -    fn from_element(element: Element) -> Result<Self>; +    fn from_element(element: Element) -> DeserializeResult<Self>;  }  pub trait IntoElement { -    fn into_element(&self) -> Element; +    fn builder(&self) -> ElementBuilder; -    fn get_content(&self) -> Vec<Content> { +    fn into_element(&self) -> Element { +        self.builder().build().unwrap() +    } + +    fn get_content(&self) -> VecDeque<Content> {          let element = self.into_element();          element.content      } @@ -67,7 +75,524 @@ pub struct Element {      // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.      pub attributes: HashMap<Name, String>,      // TODO: make a hashmap maybe? to be able to address parts of the content individually -    pub content: Vec<Content>, +    pub content: VecDeque<Content>, +} + +impl Element { +    pub fn identify(&self) -> (Option<&str>, &str) { +        (self.name.namespace.as_deref(), &self.name.local_name) +    } + +    pub fn check_name(&self, name: &str) -> DeserializeResult<()> { +        if self.name.local_name == name { +            Ok(()) +        } else { +            return Err(DeserializeError::IncorrectName( +                self.name.local_name.clone(), +            )); +        } +    } + +    pub fn check_namespace(&self, namespace: &str) -> DeserializeResult<()> { +        if self.name.namespace.as_deref() == Some(namespace) { +            return Ok(()); +        } else { +            if let Some(namespace) = &self.name.namespace { +                return Err(DeserializeError::IncorrectNamespace(namespace.clone())); +            } else { +                return Err(DeserializeError::Unqualified); +            } +        } +    } + +    pub fn attribute_opt<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<Option<V>> { +        if let Some(att_value) = self.attributes.remove(&Name { +            namespace: None, +            local_name: att_name.to_string(), +        }) { +            let value = <V as FromStr>::from_str(&att_value) +                .map_err(|_| DeserializeError::FromStr(att_value))?; +            return Ok(Some(value)); +        } else { +            return Ok(None); +        } +    } + +    pub fn attribute_opt_namespaced<V: FromStr>( +        &mut self, +        att_name: &str, +        att_namespace: &str, +    ) -> DeserializeResult<Option<V>> { +        if let Some(att_value) = self.attributes.remove(&Name { +            namespace: Some(att_namespace.to_string()), +            local_name: att_name.to_string(), +        }) { +            let value = <V as FromStr>::from_str(&att_value) +                .map_err(|_| DeserializeError::FromStr(att_value))?; +            return Ok(Some(value)); +        } else { +            return Ok(None); +        } +    } + +    pub fn attribute<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<V> { +        let name = Name { +            namespace: None, +            local_name: att_name.to_string(), +        }; +        if let Some(att_value) = self.attributes.remove(&name) { +            let value = <V as FromStr>::from_str(&att_value) +                .map_err(|_| DeserializeError::FromStr(att_value))?; +            return Ok(value); +        } else { +            return Err(DeserializeError::MissingAttribute(name)); +        } +    } + +    pub fn attribute_namespaced<V: FromStr>( +        &mut self, +        att_name: &str, +        att_namespace: &str, +    ) -> DeserializeResult<V> { +        let name = Name { +            namespace: Some(att_namespace.to_string()), +            local_name: att_name.to_string(), +        }; +        if let Some(att_value) = self.attributes.remove(&name) { +            let value = <V as FromStr>::from_str(&att_value) +                .map_err(|_| DeserializeError::FromStr(att_value))?; +            return Ok(value); +        } else { +            return Err(DeserializeError::MissingAttribute(name)); +        } +    } + +    pub fn no_more_attributes(self) -> DeserializeResult<Self> { +        if self.attributes.is_empty() { +            Ok(self) +        } else { +            Err(DeserializeError::UnexpectedAttributes(self.attributes)) +        } +    } + +    // for xs:any + +    pub fn child_one<T: FromElement>(&mut self) -> DeserializeResult<T> { +        if let Some(position) = self.content.iter().position(|content| match content { +            Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(), +            Content::Text(_) => false, +            Content::PI => false, +            Content::Comment(_) => false, +        }) { +            let element = self.content.remove(position).unwrap(); +            if let Content::Element(e) = element { +                return <T as FromElement>::from_element(e); +            } else { +                return Err(DeserializeError::MissingChild); +            } +        } else { +            return Err(DeserializeError::MissingChild); +        } +    } + +    pub fn child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> { +        if let Some(position) = self.content.iter().position(|content| match content { +            Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(), +            Content::Text(_) => false, +            Content::PI => false, +            Content::Comment(_) => false, +        }) { +            let element = self.content.remove(position).unwrap(); +            if let Content::Element(e) = element { +                return Ok(Some(<T as FromElement>::from_element(e)?)); +            } else { +                return Err(DeserializeError::MissingChild); +            } +        } else { +            return Ok(None); +        } +    } + +    pub fn children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> { +        let (children, rest): (VecDeque<_>, VecDeque<_>) = self +            .content +            .clone() +            .into_iter() +            .partition(|content| match content { +                Content::Element(element) => { +                    <T as FromElement>::from_element(element.clone()).is_ok() +                } +                Content::Text(_) => false, +                Content::PI => false, +                Content::Comment(_) => false, +            }); +        self.content = rest; +        let children: Vec<T> = children +            .into_iter() +            .map(|content| { +                let child = match content { +                    Content::Element(element) => <T as FromElement>::from_element(element).ok(), +                    Content::Text(_) => None, +                    Content::PI => None, +                    Content::Comment(_) => None, +                } +                .unwrap(); +                child +            }) +            .collect(); +        Ok(children) +    } + +    pub fn value<V: FromStr>(&mut self) -> DeserializeResult<V> { +        if let Some(position) = self.content.iter().position(|content| match content { +            Content::Element(_) => false, +            Content::Text(s) => <V as FromStr>::from_str(s).is_ok(), +            Content::PI => false, +            Content::Comment(_) => false, +        }) { +            let element = self.content.remove(position).unwrap(); +            if let Content::Text(v) = element { +                return Ok(<V as FromStr>::from_str(&v).ok().unwrap()); +            } else { +                panic!("infallible") +            } +        } else { +            return Err(DeserializeError::MissingValue); +        } +    } + +    pub fn value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> { +        if let Some(position) = self.content.iter().position(|content| match content { +            Content::Element(_) => false, +            Content::Text(s) => <V as FromStr>::from_str(s).is_ok(), +            Content::PI => false, +            Content::Comment(_) => false, +        }) { +            let element = self.content.remove(position).unwrap(); +            if let Content::Text(v) = element { +                return Ok(<V as FromStr>::from_str(&v).ok()); +            } else { +                panic!("infallible") +            } +        } else { +            return Ok(None); +        } +    } + +    // for xs:sequence + +    pub fn pop_child_one<T: FromElement>(&mut self) -> DeserializeResult<T> { +        loop { +            let child = self +                .content +                .pop_front() +                .ok_or(DeserializeError::MissingChild)?; +            match child { +                Content::Element(element) => return Ok(<T as FromElement>::from_element(element)?), +                Content::Text(_) => { +                    return Err(DeserializeError::UnexpectedContent(self.content.clone())) +                } +                Content::PI => {} +                Content::Comment(_) => {} +            } +        } +    } + +    pub fn pop_child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> { +        loop { +            let child = self.content.pop_front(); +            if let Some(child) = child { +                match child { +                    Content::Element(element) => { +                        return Ok(Some(<T as FromElement>::from_element(element)?)) +                    } +                    Content::Text(_) => { +                        return Err(DeserializeError::UnexpectedContent(self.content.clone())) +                    } +                    Content::PI => {} +                    Content::Comment(_) => {} +                } +            } else { +                return Ok(None); +            } +        } +    } + +    pub fn pop_children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> { +        let mut children = Vec::new(); +        loop { +            let child = self.content.front(); +            if let Some(child) = child { +                match child { +                    Content::Element(element) => { +                        if let Ok(child) = <T as FromElement>::from_element(element.clone()) { +                            children.push(child); +                            self.content.pop_front(); +                        } +                    } +                    Content::Text(_) => return Ok(children), +                    Content::PI => {} +                    Content::Comment(_) => {} +                } +            } else { +                return Ok(children); +            } +        } +    } + +    pub fn pop_value<V: FromStr>(&mut self) -> DeserializeResult<V> { +        loop { +            let child = self +                .content +                .pop_front() +                .ok_or(DeserializeError::MissingChild)?; +            match child { +                Content::Element(_) => { +                    return Err(DeserializeError::UnexpectedContent(self.content.clone())) +                } +                Content::Text(t) => { +                    return Ok( +                        <V as FromStr>::from_str(&t).map_err(|_| DeserializeError::FromStr(t))? +                    ) +                } +                Content::PI => {} +                Content::Comment(_) => {} +            } +        } +    } + +    pub fn pop_value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> { +        loop { +            let child = self.content.pop_front(); +            if let Some(child) = child { +                match child { +                    Content::Element(_) => { +                        return Err(DeserializeError::UnexpectedContent(self.content.clone())) +                    } +                    Content::Text(t) => { +                        return Ok(Some( +                            <V as FromStr>::from_str(&t) +                                .map_err(|_| DeserializeError::FromStr(t))?, +                        )) +                    } +                    Content::PI => {} +                    Content::Comment(_) => {} +                } +            } else { +                return Ok(None); +            } +        } +    } + +    pub fn no_more_content(self) -> DeserializeResult<Self> { +        if self +            .content +            .iter() +            .filter(|content| match content { +                Content::Element(_) => true, +                Content::Text(_) => true, +                Content::PI => false, +                Content::Comment(_) => false, +            }) +            .collect::<Vec<_>>() +            .is_empty() +        { +            Ok(self) +        } else { +            Err(DeserializeError::UnexpectedContent(self.content)) +        } +    } + +    pub fn builder(name: impl ToString, namespace: Option<impl ToString>) -> ElementBuilder { +        ElementBuilder::new(name, namespace) +    } +} + +pub struct ElementBuilder { +    name: Name, +    namespace_declaration_overrides: Vec<NamespaceDeclaration>, +    attributes: Vec<(Name, String)>, +    content: Vec<ContentBuilder>, +} + +impl ElementBuilder { +    pub fn new(name: impl ToString, namespace: Option<impl ToString>) -> Self { +        Self { +            name: Name { +                namespace: namespace.map(|namespace| namespace.to_string()), +                local_name: name.to_string(), +            }, +            namespace_declaration_overrides: Vec::new(), +            attributes: Vec::new(), +            content: Vec::new(), +        } +    } + +    pub fn push_namespace_declaration_override( +        mut self, +        prefix: Option<impl ToString>, +        namespace: impl ToString, +    ) -> Self { +        self.namespace_declaration_overrides +            .push(NamespaceDeclaration { +                prefix: prefix.map(|prefix| prefix.to_string()), +                namespace: namespace.to_string(), +            }); +        self +    } + +    pub fn push_attribute<N: ToString, V: ToString>(mut self, name: N, value: V) -> Self { +        self.attributes.push(( +            // TODO: make sure name is a valid name, same for prefixes +            Name { +                namespace: None, +                local_name: name.to_string(), +            }, +            value.to_string(), +        )); +        self +    } + +    pub fn push_attribute_namespaced( +        mut self, +        namespace: impl ToString, +        name: impl ToString, +        value: impl ToString, +    ) -> Self { +        self.attributes.push(( +            Name { +                namespace: Some(namespace.to_string()), +                local_name: name.to_string(), +            }, +            value.to_string(), +        )); +        self +    } + +    pub fn push_child(mut self, child: ElementBuilder) -> Self { +        self.content.push(ContentBuilder::Element(child)); +        self +    } + +    pub fn push_text(mut self, text: impl ToString) -> Self { +        self.content.push(ContentBuilder::Text(text.to_string())); +        self +    } + +    pub fn push_attribute_opt(self, name: impl ToString, value: Option<impl ToString>) -> Self { +        if let Some(value) = value { +            self.push_attribute(name, value) +        } else { +            self +        } +    } + +    pub fn push_attribute_opt_namespaced( +        self, +        namespace: impl ToString, +        name: impl ToString, +        value: Option<impl ToString>, +    ) -> Self { +        if let Some(value) = value { +            self.push_attribute_namespaced(namespace, name, value) +        } else { +            self +        } +    } + +    pub fn push_child_opt(self, child: Option<ElementBuilder>) -> Self { +        if let Some(child) = child { +            self.push_child(child) +        } else { +            self +        } +    } + +    pub fn push_text_opt(self, text: Option<impl ToString>) -> Self { +        if let Some(text) = text { +            self.push_text(text) +        } else { +            self +        } +    } + +    pub fn push_content(mut self, content: ContentBuilder) -> Self { +        self.content.push(content); +        self +    } + +    pub fn push_children(self, children: Vec<impl IntoContent>) -> Self { +        let mut element_builder = self; +        for child in children { +            element_builder = element_builder.push_content(child.builder()) +        } +        element_builder +    } + +    pub fn build(&self) -> Result<Element> { +        let mut namespace_declaration_overrides = HashSet::new(); +        for namespace_declaration in &self.namespace_declaration_overrides { +            if !namespace_declaration_overrides.insert(namespace_declaration.clone()) { +                return Err(Error::DuplicateNameSpaceDeclaration( +                    namespace_declaration.clone(), +                )); +            } +        } + +        let mut attributes = HashMap::new(); +        for (att_name, att_value) in &self.attributes { +            if attributes +                .insert(att_name.clone(), att_value.to_string()) +                .is_some() +            { +                // TODO: better error +                return Err(Error::DuplicateAttribute(att_name.local_name.to_string())); +            } +        } + +        let content: Result<VecDeque<Content>> = self +            .content +            .iter() +            .map(|content_builder| -> Result<Content> { Ok(content_builder.build()?) }) +            .collect(); +        let content = content?; + +        Ok(Element { +            name: self.name.clone(), +            namespace_declaration_overrides, +            attributes, +            content, +        }) +    } +} + +pub trait IntoContent { +    fn builder(&self) -> ContentBuilder; +} + +impl<T> IntoContent for T +where +    T: IntoElement, +{ +    fn builder(&self) -> ContentBuilder { +        ContentBuilder::Element(self.builder()) +    } +} + +pub enum ContentBuilder { +    Element(ElementBuilder), +    Text(String), +} + +impl ContentBuilder { +    pub fn build(&self) -> Result<Content> { +        match self { +            ContentBuilder::Element(element_builder) => { +                Ok(Content::Element(element_builder.build()?)) +            } +            ContentBuilder::Text(text) => Ok(Content::Text(text.to_string())), +        } +    }  }  pub fn escape_str(s: &str) -> String { diff --git a/src/error.rs b/src/error.rs index 85b5d70..dd8ea17 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,4 +1,8 @@ -use std::{num::ParseIntError, str::Utf8Error}; +use std::{ +    collections::{HashMap, VecDeque}, +    num::ParseIntError, +    str::{FromStr, Utf8Error}, +};  use crate::{      element::{Content, Name, NamespaceDeclaration}, @@ -6,6 +10,19 @@ use crate::{  };  #[derive(Debug)] +pub enum DeserializeError { +    FromStr(String), +    UnexpectedAttributes(HashMap<Name, String>), +    UnexpectedContent(VecDeque<Content>), +    MissingAttribute(Name), +    IncorrectName(String), +    IncorrectNamespace(String), +    Unqualified, +    MissingChild, +    MissingValue, +} + +#[derive(Debug)]  pub enum Error {      ReadError(std::io::Error),      Utf8Error(Utf8Error), @@ -26,6 +43,13 @@ pub enum Error {      UnexpectedNumberOfContents(usize),      UnexpectedContent(Content),      UnexpectedElement(Name), +    Deserialize(DeserializeError), +} + +impl From<DeserializeError> for Error { +    fn from(e: DeserializeError) -> Self { +        Self::Deserialize(e) +    }  }  impl From<std::io::Error> for Error { @@ -11,6 +11,7 @@ pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace";  pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/";  pub use element::Element; +pub use error::DeserializeError;  pub use error::Error;  pub use reader::Reader;  pub use writer::Writer; diff --git a/src/reader.rs b/src/reader.rs index e6bb57c..aa4d467 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -2,7 +2,7 @@ use circular::Buffer;  use futures::{FutureExt, Stream};  use nom::Err;  use std::{ -    collections::{hash_set, BTreeMap, HashMap, HashSet}, +    collections::{hash_set, BTreeMap, HashMap, HashSet, VecDeque},      future::Future,      path::Prefix,      pin::{pin, Pin}, @@ -102,12 +102,12 @@ where      pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> {          let element = self.read_start_tag().await?; -        FromElement::from_element(element) +        Ok(FromElement::from_element(element)?)      }      pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> {          let element = self.read_element().await?; -        FromElement::from_element(element) +        Ok(FromElement::from_element(element)?)      }      pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> { @@ -438,7 +438,7 @@ impl<R> Reader<R> {              name: element_name,              namespace_declaration_overrides: element_namespace_declarations,              attributes, -            content: Vec::new(), +            content: VecDeque::new(),          });      } @@ -652,7 +652,7 @@ impl<R> Reader<R> {              namespace_declarations.pop();          } else { -            content = Vec::new(); +            content = VecDeque::new();          }          return Ok(Element { @@ -666,18 +666,18 @@ impl<R> Reader<R> {      fn content_from_xml(          namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,          xml_content: xml::Content, -    ) -> Result<Vec<Content>> { -        let mut content = Vec::new(); +    ) -> Result<VecDeque<Content>> { +        let mut content = VecDeque::new();          let mut text = xml_content.char_data.map(|str| String::from(*str));          for (content_item, char_data) in xml_content.content {              match content_item {                  xml::ContentItem::Element(element) => {                      text.map(|text| {                          if !text.is_empty() { -                            content.push(Content::Text(text)) +                            content.push_back(Content::Text(text))                          }                      }); -                    content.push(Content::Element(Self::element_from_xml( +                    content.push_back(Content::Element(Self::element_from_xml(                          namespaces, element,                      )?));                      text = char_data.map(|str| String::from(*str)); @@ -711,7 +711,7 @@ impl<R> Reader<R> {          }          text.map(|text| {              if !text.is_empty() { -                content.push(Content::Text(text)) +                content.push_back(Content::Text(text))              }          });          Ok(content) | 
