diff options
author | 2024-11-28 18:00:30 +0000 | |
---|---|---|
committer | 2024-11-28 18:00:30 +0000 | |
commit | aa940a8eac74aca8cd3c202a05092538d1140dda (patch) | |
tree | d14997232d48ca723dc2486f0a6346421c9af163 | |
parent | 381af38a0910ca42ccb36568ebbcd147cfbd237b (diff) | |
download | peanuts-aa940a8eac74aca8cd3c202a05092538d1140dda.tar.gz peanuts-aa940a8eac74aca8cd3c202a05092538d1140dda.tar.bz2 peanuts-aa940a8eac74aca8cd3c202a05092538d1140dda.zip |
create element builder and refactor api
-rw-r--r-- | src/element.rs | 537 | ||||
-rw-r--r-- | src/error.rs | 26 | ||||
-rw-r--r-- | src/lib.rs | 1 | ||||
-rw-r--r-- | src/reader.rs | 20 |
4 files changed, 567 insertions, 17 deletions
diff --git a/src/element.rs b/src/element.rs index 1c04c98..98a3315 100644 --- a/src/element.rs +++ b/src/element.rs @@ -1,25 +1,33 @@ // elements resemble a final tree, including inherited namespace information +#![feature(drain_filter)] + use std::{ - collections::{HashMap, HashSet}, + collections::{HashMap, HashSet, VecDeque}, convert::Infallible, str::FromStr, }; use crate::{ - error::Error, + error::{DeserializeError, Error}, xml::{self, parsers_complete::Parser, Attribute}, Result, }; +pub type DeserializeResult<T> = std::result::Result<T, DeserializeError>; + pub trait FromElement: Sized { - fn from_element(element: Element) -> Result<Self>; + fn from_element(element: Element) -> DeserializeResult<Self>; } pub trait IntoElement { - fn into_element(&self) -> Element; + fn builder(&self) -> ElementBuilder; - fn get_content(&self) -> Vec<Content> { + fn into_element(&self) -> Element { + self.builder().build().unwrap() + } + + fn get_content(&self) -> VecDeque<Content> { let element = self.into_element(); element.content } @@ -67,7 +75,524 @@ pub struct Element { // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified. pub attributes: HashMap<Name, String>, // TODO: make a hashmap maybe? to be able to address parts of the content individually - pub content: Vec<Content>, + pub content: VecDeque<Content>, +} + +impl Element { + pub fn identify(&self) -> (Option<&str>, &str) { + (self.name.namespace.as_deref(), &self.name.local_name) + } + + pub fn check_name(&self, name: &str) -> DeserializeResult<()> { + if self.name.local_name == name { + Ok(()) + } else { + return Err(DeserializeError::IncorrectName( + self.name.local_name.clone(), + )); + } + } + + pub fn check_namespace(&self, namespace: &str) -> DeserializeResult<()> { + if self.name.namespace.as_deref() == Some(namespace) { + return Ok(()); + } else { + if let Some(namespace) = &self.name.namespace { + return Err(DeserializeError::IncorrectNamespace(namespace.clone())); + } else { + return Err(DeserializeError::Unqualified); + } + } + } + + pub fn attribute_opt<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<Option<V>> { + if let Some(att_value) = self.attributes.remove(&Name { + namespace: None, + local_name: att_name.to_string(), + }) { + let value = <V as FromStr>::from_str(&att_value) + .map_err(|_| DeserializeError::FromStr(att_value))?; + return Ok(Some(value)); + } else { + return Ok(None); + } + } + + pub fn attribute_opt_namespaced<V: FromStr>( + &mut self, + att_name: &str, + att_namespace: &str, + ) -> DeserializeResult<Option<V>> { + if let Some(att_value) = self.attributes.remove(&Name { + namespace: Some(att_namespace.to_string()), + local_name: att_name.to_string(), + }) { + let value = <V as FromStr>::from_str(&att_value) + .map_err(|_| DeserializeError::FromStr(att_value))?; + return Ok(Some(value)); + } else { + return Ok(None); + } + } + + pub fn attribute<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<V> { + let name = Name { + namespace: None, + local_name: att_name.to_string(), + }; + if let Some(att_value) = self.attributes.remove(&name) { + let value = <V as FromStr>::from_str(&att_value) + .map_err(|_| DeserializeError::FromStr(att_value))?; + return Ok(value); + } else { + return Err(DeserializeError::MissingAttribute(name)); + } + } + + pub fn attribute_namespaced<V: FromStr>( + &mut self, + att_name: &str, + att_namespace: &str, + ) -> DeserializeResult<V> { + let name = Name { + namespace: Some(att_namespace.to_string()), + local_name: att_name.to_string(), + }; + if let Some(att_value) = self.attributes.remove(&name) { + let value = <V as FromStr>::from_str(&att_value) + .map_err(|_| DeserializeError::FromStr(att_value))?; + return Ok(value); + } else { + return Err(DeserializeError::MissingAttribute(name)); + } + } + + pub fn no_more_attributes(self) -> DeserializeResult<Self> { + if self.attributes.is_empty() { + Ok(self) + } else { + Err(DeserializeError::UnexpectedAttributes(self.attributes)) + } + } + + // for xs:any + + pub fn child_one<T: FromElement>(&mut self) -> DeserializeResult<T> { + if let Some(position) = self.content.iter().position(|content| match content { + Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(), + Content::Text(_) => false, + Content::PI => false, + Content::Comment(_) => false, + }) { + let element = self.content.remove(position).unwrap(); + if let Content::Element(e) = element { + return <T as FromElement>::from_element(e); + } else { + return Err(DeserializeError::MissingChild); + } + } else { + return Err(DeserializeError::MissingChild); + } + } + + pub fn child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> { + if let Some(position) = self.content.iter().position(|content| match content { + Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(), + Content::Text(_) => false, + Content::PI => false, + Content::Comment(_) => false, + }) { + let element = self.content.remove(position).unwrap(); + if let Content::Element(e) = element { + return Ok(Some(<T as FromElement>::from_element(e)?)); + } else { + return Err(DeserializeError::MissingChild); + } + } else { + return Ok(None); + } + } + + pub fn children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> { + let (children, rest): (VecDeque<_>, VecDeque<_>) = self + .content + .clone() + .into_iter() + .partition(|content| match content { + Content::Element(element) => { + <T as FromElement>::from_element(element.clone()).is_ok() + } + Content::Text(_) => false, + Content::PI => false, + Content::Comment(_) => false, + }); + self.content = rest; + let children: Vec<T> = children + .into_iter() + .map(|content| { + let child = match content { + Content::Element(element) => <T as FromElement>::from_element(element).ok(), + Content::Text(_) => None, + Content::PI => None, + Content::Comment(_) => None, + } + .unwrap(); + child + }) + .collect(); + Ok(children) + } + + pub fn value<V: FromStr>(&mut self) -> DeserializeResult<V> { + if let Some(position) = self.content.iter().position(|content| match content { + Content::Element(_) => false, + Content::Text(s) => <V as FromStr>::from_str(s).is_ok(), + Content::PI => false, + Content::Comment(_) => false, + }) { + let element = self.content.remove(position).unwrap(); + if let Content::Text(v) = element { + return Ok(<V as FromStr>::from_str(&v).ok().unwrap()); + } else { + panic!("infallible") + } + } else { + return Err(DeserializeError::MissingValue); + } + } + + pub fn value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> { + if let Some(position) = self.content.iter().position(|content| match content { + Content::Element(_) => false, + Content::Text(s) => <V as FromStr>::from_str(s).is_ok(), + Content::PI => false, + Content::Comment(_) => false, + }) { + let element = self.content.remove(position).unwrap(); + if let Content::Text(v) = element { + return Ok(<V as FromStr>::from_str(&v).ok()); + } else { + panic!("infallible") + } + } else { + return Ok(None); + } + } + + // for xs:sequence + + pub fn pop_child_one<T: FromElement>(&mut self) -> DeserializeResult<T> { + loop { + let child = self + .content + .pop_front() + .ok_or(DeserializeError::MissingChild)?; + match child { + Content::Element(element) => return Ok(<T as FromElement>::from_element(element)?), + Content::Text(_) => { + return Err(DeserializeError::UnexpectedContent(self.content.clone())) + } + Content::PI => {} + Content::Comment(_) => {} + } + } + } + + pub fn pop_child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> { + loop { + let child = self.content.pop_front(); + if let Some(child) = child { + match child { + Content::Element(element) => { + return Ok(Some(<T as FromElement>::from_element(element)?)) + } + Content::Text(_) => { + return Err(DeserializeError::UnexpectedContent(self.content.clone())) + } + Content::PI => {} + Content::Comment(_) => {} + } + } else { + return Ok(None); + } + } + } + + pub fn pop_children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> { + let mut children = Vec::new(); + loop { + let child = self.content.front(); + if let Some(child) = child { + match child { + Content::Element(element) => { + if let Ok(child) = <T as FromElement>::from_element(element.clone()) { + children.push(child); + self.content.pop_front(); + } + } + Content::Text(_) => return Ok(children), + Content::PI => {} + Content::Comment(_) => {} + } + } else { + return Ok(children); + } + } + } + + pub fn pop_value<V: FromStr>(&mut self) -> DeserializeResult<V> { + loop { + let child = self + .content + .pop_front() + .ok_or(DeserializeError::MissingChild)?; + match child { + Content::Element(_) => { + return Err(DeserializeError::UnexpectedContent(self.content.clone())) + } + Content::Text(t) => { + return Ok( + <V as FromStr>::from_str(&t).map_err(|_| DeserializeError::FromStr(t))? + ) + } + Content::PI => {} + Content::Comment(_) => {} + } + } + } + + pub fn pop_value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> { + loop { + let child = self.content.pop_front(); + if let Some(child) = child { + match child { + Content::Element(_) => { + return Err(DeserializeError::UnexpectedContent(self.content.clone())) + } + Content::Text(t) => { + return Ok(Some( + <V as FromStr>::from_str(&t) + .map_err(|_| DeserializeError::FromStr(t))?, + )) + } + Content::PI => {} + Content::Comment(_) => {} + } + } else { + return Ok(None); + } + } + } + + pub fn no_more_content(self) -> DeserializeResult<Self> { + if self + .content + .iter() + .filter(|content| match content { + Content::Element(_) => true, + Content::Text(_) => true, + Content::PI => false, + Content::Comment(_) => false, + }) + .collect::<Vec<_>>() + .is_empty() + { + Ok(self) + } else { + Err(DeserializeError::UnexpectedContent(self.content)) + } + } + + pub fn builder(name: impl ToString, namespace: Option<impl ToString>) -> ElementBuilder { + ElementBuilder::new(name, namespace) + } +} + +pub struct ElementBuilder { + name: Name, + namespace_declaration_overrides: Vec<NamespaceDeclaration>, + attributes: Vec<(Name, String)>, + content: Vec<ContentBuilder>, +} + +impl ElementBuilder { + pub fn new(name: impl ToString, namespace: Option<impl ToString>) -> Self { + Self { + name: Name { + namespace: namespace.map(|namespace| namespace.to_string()), + local_name: name.to_string(), + }, + namespace_declaration_overrides: Vec::new(), + attributes: Vec::new(), + content: Vec::new(), + } + } + + pub fn push_namespace_declaration_override( + mut self, + prefix: Option<impl ToString>, + namespace: impl ToString, + ) -> Self { + self.namespace_declaration_overrides + .push(NamespaceDeclaration { + prefix: prefix.map(|prefix| prefix.to_string()), + namespace: namespace.to_string(), + }); + self + } + + pub fn push_attribute<N: ToString, V: ToString>(mut self, name: N, value: V) -> Self { + self.attributes.push(( + // TODO: make sure name is a valid name, same for prefixes + Name { + namespace: None, + local_name: name.to_string(), + }, + value.to_string(), + )); + self + } + + pub fn push_attribute_namespaced( + mut self, + namespace: impl ToString, + name: impl ToString, + value: impl ToString, + ) -> Self { + self.attributes.push(( + Name { + namespace: Some(namespace.to_string()), + local_name: name.to_string(), + }, + value.to_string(), + )); + self + } + + pub fn push_child(mut self, child: ElementBuilder) -> Self { + self.content.push(ContentBuilder::Element(child)); + self + } + + pub fn push_text(mut self, text: impl ToString) -> Self { + self.content.push(ContentBuilder::Text(text.to_string())); + self + } + + pub fn push_attribute_opt(self, name: impl ToString, value: Option<impl ToString>) -> Self { + if let Some(value) = value { + self.push_attribute(name, value) + } else { + self + } + } + + pub fn push_attribute_opt_namespaced( + self, + namespace: impl ToString, + name: impl ToString, + value: Option<impl ToString>, + ) -> Self { + if let Some(value) = value { + self.push_attribute_namespaced(namespace, name, value) + } else { + self + } + } + + pub fn push_child_opt(self, child: Option<ElementBuilder>) -> Self { + if let Some(child) = child { + self.push_child(child) + } else { + self + } + } + + pub fn push_text_opt(self, text: Option<impl ToString>) -> Self { + if let Some(text) = text { + self.push_text(text) + } else { + self + } + } + + pub fn push_content(mut self, content: ContentBuilder) -> Self { + self.content.push(content); + self + } + + pub fn push_children(self, children: Vec<impl IntoContent>) -> Self { + let mut element_builder = self; + for child in children { + element_builder = element_builder.push_content(child.builder()) + } + element_builder + } + + pub fn build(&self) -> Result<Element> { + let mut namespace_declaration_overrides = HashSet::new(); + for namespace_declaration in &self.namespace_declaration_overrides { + if !namespace_declaration_overrides.insert(namespace_declaration.clone()) { + return Err(Error::DuplicateNameSpaceDeclaration( + namespace_declaration.clone(), + )); + } + } + + let mut attributes = HashMap::new(); + for (att_name, att_value) in &self.attributes { + if attributes + .insert(att_name.clone(), att_value.to_string()) + .is_some() + { + // TODO: better error + return Err(Error::DuplicateAttribute(att_name.local_name.to_string())); + } + } + + let content: Result<VecDeque<Content>> = self + .content + .iter() + .map(|content_builder| -> Result<Content> { Ok(content_builder.build()?) }) + .collect(); + let content = content?; + + Ok(Element { + name: self.name.clone(), + namespace_declaration_overrides, + attributes, + content, + }) + } +} + +pub trait IntoContent { + fn builder(&self) -> ContentBuilder; +} + +impl<T> IntoContent for T +where + T: IntoElement, +{ + fn builder(&self) -> ContentBuilder { + ContentBuilder::Element(self.builder()) + } +} + +pub enum ContentBuilder { + Element(ElementBuilder), + Text(String), +} + +impl ContentBuilder { + pub fn build(&self) -> Result<Content> { + match self { + ContentBuilder::Element(element_builder) => { + Ok(Content::Element(element_builder.build()?)) + } + ContentBuilder::Text(text) => Ok(Content::Text(text.to_string())), + } + } } pub fn escape_str(s: &str) -> String { diff --git a/src/error.rs b/src/error.rs index 85b5d70..dd8ea17 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,4 +1,8 @@ -use std::{num::ParseIntError, str::Utf8Error}; +use std::{ + collections::{HashMap, VecDeque}, + num::ParseIntError, + str::{FromStr, Utf8Error}, +}; use crate::{ element::{Content, Name, NamespaceDeclaration}, @@ -6,6 +10,19 @@ use crate::{ }; #[derive(Debug)] +pub enum DeserializeError { + FromStr(String), + UnexpectedAttributes(HashMap<Name, String>), + UnexpectedContent(VecDeque<Content>), + MissingAttribute(Name), + IncorrectName(String), + IncorrectNamespace(String), + Unqualified, + MissingChild, + MissingValue, +} + +#[derive(Debug)] pub enum Error { ReadError(std::io::Error), Utf8Error(Utf8Error), @@ -26,6 +43,13 @@ pub enum Error { UnexpectedNumberOfContents(usize), UnexpectedContent(Content), UnexpectedElement(Name), + Deserialize(DeserializeError), +} + +impl From<DeserializeError> for Error { + fn from(e: DeserializeError) -> Self { + Self::Deserialize(e) + } } impl From<std::io::Error> for Error { @@ -11,6 +11,7 @@ pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace"; pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/"; pub use element::Element; +pub use error::DeserializeError; pub use error::Error; pub use reader::Reader; pub use writer::Writer; diff --git a/src/reader.rs b/src/reader.rs index e6bb57c..aa4d467 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -2,7 +2,7 @@ use circular::Buffer; use futures::{FutureExt, Stream}; use nom::Err; use std::{ - collections::{hash_set, BTreeMap, HashMap, HashSet}, + collections::{hash_set, BTreeMap, HashMap, HashSet, VecDeque}, future::Future, path::Prefix, pin::{pin, Pin}, @@ -102,12 +102,12 @@ where pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> { let element = self.read_start_tag().await?; - FromElement::from_element(element) + Ok(FromElement::from_element(element)?) } pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> { let element = self.read_element().await?; - FromElement::from_element(element) + Ok(FromElement::from_element(element)?) } pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> { @@ -438,7 +438,7 @@ impl<R> Reader<R> { name: element_name, namespace_declaration_overrides: element_namespace_declarations, attributes, - content: Vec::new(), + content: VecDeque::new(), }); } @@ -652,7 +652,7 @@ impl<R> Reader<R> { namespace_declarations.pop(); } else { - content = Vec::new(); + content = VecDeque::new(); } return Ok(Element { @@ -666,18 +666,18 @@ impl<R> Reader<R> { fn content_from_xml( namespaces: &mut Vec<HashSet<NamespaceDeclaration>>, xml_content: xml::Content, - ) -> Result<Vec<Content>> { - let mut content = Vec::new(); + ) -> Result<VecDeque<Content>> { + let mut content = VecDeque::new(); let mut text = xml_content.char_data.map(|str| String::from(*str)); for (content_item, char_data) in xml_content.content { match content_item { xml::ContentItem::Element(element) => { text.map(|text| { if !text.is_empty() { - content.push(Content::Text(text)) + content.push_back(Content::Text(text)) } }); - content.push(Content::Element(Self::element_from_xml( + content.push_back(Content::Element(Self::element_from_xml( namespaces, element, )?)); text = char_data.map(|str| String::from(*str)); @@ -711,7 +711,7 @@ impl<R> Reader<R> { } text.map(|text| { if !text.is_empty() { - content.push(Content::Text(text)) + content.push_back(Content::Text(text)) } }); Ok(content) |