aboutsummaryrefslogtreecommitdiffstats
path: root/src/reader.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/reader.rs')
-rw-r--r--src/reader.rs325
1 files changed, 321 insertions, 4 deletions
diff --git a/src/reader.rs b/src/reader.rs
index 313de4c..b51489f 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -1,31 +1,348 @@
+use circular::Buffer;
use futures::Stream;
use nom::Err;
-use std::{collections::BTreeMap, str};
-use tokio::io::AsyncBufReadExt;
+use std::{
+ collections::{BTreeMap, HashMap, HashSet},
+ path::Prefix,
+ str::{self, FromStr},
+};
+use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
+
+static MAX_STANZA_SIZE: usize = 65536;
use crate::{
- element::{Element, Name, Namespace},
+ element::{Content, Element, Name, Namespace},
error::Error,
+ xml::{self, parsers::Parser},
+ Result,
};
/// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> {
inner: R,
+ buffer: Buffer,
// holds which tags we are in atm over depth
+ // to have names reference namespaces could
depth: Vec<Name>,
- namespaces: Vec<(usize, Namespace)>,
+ namespaces: Vec<HashSet<Namespace>>,
}
impl<R> Reader<R> {
pub fn new(reader: R) -> Self {
Self {
inner: reader,
+ buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
depth: Vec::new(),
namespaces: Vec::new(),
}
}
}
+impl<R> Reader<R>
+where
+ R: AsyncRead + Unpin,
+{
+ async fn read_buf(&mut self) -> Result<usize> {
+ Ok(self.inner.read_buf(&mut self.buffer).await?)
+ }
+
+ async fn read_element<'s>(&'s mut self) -> Result<Element> {
+ self.read_buf().await?;
+ let mut input = str::from_utf8(self.buffer.data())?;
+ loop {
+ match xml::Element::parse(input) {
+ Ok((rest, e)) => {
+ let len = self.buffer.available_data() - rest.as_bytes().len();
+ let element = Reader::<R>::element_from_xml(&mut self.namespaces, e)?;
+ self.buffer.consume(len);
+ return Ok(element);
+ }
+ std::result::Result::Err(e) => match e {
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ input = str::from_utf8(self.buffer.data())?;
+ }
+ // TODO: better error
+ Err::Error(e) => return Err(Error::ParseError(e.to_string())),
+ Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
+ },
+ }
+ }
+ }
+}
+
+impl<R> Reader<R> {
+ fn element_from_xml(
+ namespaces: &mut Vec<HashSet<Namespace>>,
+ element: xml::Element,
+ ) -> Result<Element> {
+ match element {
+ xml::Element::Empty(empty_elem_tag) => {
+ let mut namespace_declarations = HashSet::new();
+ for (prefix, namespace) in
+ empty_elem_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
+ Some((ns_name, value))
+ } else {
+ None
+ }
+ })
+ {
+ let prefix = match prefix {
+ xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
+ Some(prefixed_att_name.to_string())
+ }
+ xml::NSAttName::DefaultAttName => None,
+ };
+ let namespace = Namespace {
+ prefix,
+ namespace: namespace.process()?,
+ };
+ if !namespace_declarations.insert(namespace.clone()) {
+ return Err(Error::DuplicateNameSpace(namespace));
+ }
+ }
+
+ // all namespaces available to the element (from both parent elements and element itself)
+ let namespace_stack: Vec<&Namespace> = namespaces
+ .iter()
+ .flatten()
+ .chain(namespace_declarations.iter())
+ .collect();
+
+ let mut attributes = HashMap::new();
+
+ for (q_name, value) in empty_elem_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::Attribute { name, value } = attribute {
+ Some((name, value))
+ } else {
+ None
+ }
+ }) {
+ let namespace;
+ let attribute_name;
+ match q_name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack.iter().rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ });
+ attribute_name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ attribute_name = unprefixed_name.to_string();
+ }
+ }
+ if let Some(namespace) = namespace {
+ let namespace = (*namespace).clone();
+ let name = Name {
+ namespace,
+ name: attribute_name,
+ };
+ let value = value.process()?;
+ if let Some(_value) = attributes.insert(name, value) {
+ return Err(Error::DuplicateAttribute(q_name.to_string()));
+ }
+ } else {
+ return Err(Error::UnqualifiedNamespace(q_name.to_string()));
+ }
+ }
+
+ let name;
+ let namespace;
+ match &empty_elem_tag.name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack.iter().rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ });
+ name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ name = unprefixed_name.to_string();
+ }
+ }
+
+ let namespace = (*namespace
+ .ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?)
+ .clone();
+
+ let name = Name { namespace, name };
+
+ return Ok(Element {
+ name,
+ namespace_decl: namespace_declarations,
+ attributes,
+ content: Vec::new(),
+ });
+ }
+ xml::Element::NotEmpty(s_tag, content, e_tag) => {
+ if s_tag.name != e_tag.name {
+ return Err(Error::MismatchedEndTag(
+ s_tag.name.to_string(),
+ e_tag.name.to_string(),
+ ));
+ }
+ let mut namespace_declarations = HashSet::new();
+ for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
+ Some((ns_name, value))
+ } else {
+ None
+ }
+ }) {
+ let prefix = match prefix {
+ xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
+ Some(prefixed_att_name.to_string())
+ }
+ xml::NSAttName::DefaultAttName => None,
+ };
+ let namespace = Namespace {
+ prefix,
+ namespace: namespace.process()?,
+ };
+ if !namespace_declarations.insert(namespace.clone()) {
+ return Err(Error::DuplicateNameSpace(namespace));
+ }
+ }
+
+ // all namespaces available to the element (from both parent elements and element itself)
+ let namespace_stack: Vec<&Namespace> = namespaces
+ .iter()
+ .flatten()
+ .chain(namespace_declarations.iter())
+ .collect();
+
+ let mut attributes = HashMap::new();
+
+ for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::Attribute { name, value } = attribute {
+ Some((name, value))
+ } else {
+ None
+ }
+ }) {
+ let namespace;
+ let attribute_name;
+ match q_name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack.iter().rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ });
+ attribute_name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ attribute_name = unprefixed_name.to_string();
+ }
+ }
+ if let Some(namespace) = namespace {
+ let namespace = (*namespace).clone();
+ let name = Name {
+ namespace,
+ name: attribute_name,
+ };
+ let value = value.process()?;
+ if let Some(_value) = attributes.insert(name, value) {
+ return Err(Error::DuplicateAttribute(q_name.to_string()));
+ }
+ } else {
+ return Err(Error::UnqualifiedNamespace(q_name.to_string()));
+ }
+ }
+
+ let name;
+ let namespace;
+ match &s_tag.name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack.iter().rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ });
+ name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ name = unprefixed_name.to_string();
+ }
+ }
+
+ let namespace = (*namespace
+ .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
+ .clone();
+
+ let name = Name { namespace, name };
+
+ namespaces.push(namespace_declarations.clone());
+
+ let content = Self::content_from_xml(namespaces, content)?;
+
+ namespaces.pop();
+
+ return Ok(Element {
+ name,
+ namespace_decl: namespace_declarations,
+ attributes,
+ content,
+ });
+ }
+ }
+ }
+
+ fn content_from_xml(
+ namespaces: &mut Vec<HashSet<Namespace>>,
+ element: xml::Content,
+ ) -> Result<Vec<Content>> {
+ let mut content = Vec::new();
+ let mut text = element.char_data.map(|str| String::from(*str));
+ for (content_item, char_data) in element.content {
+ match content_item {
+ xml::ContentItem::Element(element) => {
+ text.map(|text| content.push(Content::Text(text)));
+ content.push(Content::Element(Self::element_from_xml(
+ namespaces, element,
+ )?));
+ text = char_data.map(|str| String::from(*str));
+ }
+ xml::ContentItem::Reference(reference) => {
+ let data = reference.process()?;
+ if let Some(text) = &mut text {
+ text.push(data)
+ } else {
+ text = Some(String::from(data))
+ }
+ char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
+ }
+ xml::ContentItem::CDSect(cd_sect) => {
+ if let Some(text) = &mut text {
+ text.push_str(**cd_sect)
+ } else {
+ text = Some(String::from(**cd_sect))
+ }
+ char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
+ }
+ // TODO: is this important?
+ xml::ContentItem::PI(pi) => {
+ char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
+ }
+ // TODO: comments?
+ xml::ContentItem::Comment(comment) => {
+ char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
+ }
+ }
+ }
+ text.map(|text| content.push(Content::Text(text)));
+ todo!()
+ }
+}
+
// impl<R> Reader<R>
// where
// R: AsyncBufReadExt + Unpin,