aboutsummaryrefslogtreecommitdiffstats
path: root/src/reader.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/reader.rs')
-rw-r--r--src/reader.rs108
1 files changed, 83 insertions, 25 deletions
diff --git a/src/reader.rs b/src/reader.rs
index f1f3744..ee8d491 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -2,7 +2,7 @@ use circular::Buffer;
use futures::{FutureExt, Stream};
use nom::Err;
use std::{
- collections::{BTreeMap, HashMap, HashSet},
+ collections::{hash_set, BTreeMap, HashMap, HashSet},
future::Future,
path::Prefix,
pin::{pin, Pin},
@@ -10,19 +10,20 @@ use std::{
};
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
-static MAX_STANZA_SIZE: usize = 65536;
-
use crate::{
- element::{Content, Element, Name, NamespaceDeclaration},
+ declaration::{Declaration, VersionInfo},
+ element::{Content, Element, FromElement, Name, NamespaceDeclaration},
error::Error,
xml::{self, parsers::Parser},
- Result,
+ Result, XMLNS_NS, XML_NS,
};
+static MAX_STANZA_SIZE: usize = 65536;
+
/// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> {
inner: R,
- buffer: Buffer,
+ pub buffer: Buffer,
// holds which tags we are in atm over depth
// to have names reference namespaces could
depth: Vec<Name>,
@@ -31,13 +32,27 @@ pub struct Reader<R> {
impl<R> Reader<R> {
pub fn new(reader: R) -> Self {
+ let mut default_declarations = HashSet::new();
+ default_declarations.insert(NamespaceDeclaration {
+ prefix: Some("xml".to_string()),
+ namespace: XML_NS.to_string(),
+ });
+ default_declarations.insert(NamespaceDeclaration {
+ prefix: Some("xmlns".to_string()),
+ namespace: XMLNS_NS.to_string(),
+ });
Self {
inner: reader,
buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
depth: Vec::new(),
- namespace_declarations: Vec::new(),
+ // TODO: make sure reserved namespaces are never overwritten
+ namespace_declarations: vec![default_declarations],
}
}
+
+ pub fn into_inner(self) -> R {
+ self.inner
+ }
}
impl<R> Reader<R>
@@ -48,18 +63,35 @@ where
Ok(self.inner.read_buf(&mut self.buffer).await?)
}
- pub async fn read_prolog<'s>(&'s mut self) -> Result<()> {
+ pub async fn read_prolog<'s>(&'s mut self) -> Result<Option<Declaration>> {
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Prolog::parse(input) {
- Ok((rest, _prolog)) => {
+ Ok((rest, (decl, _misc, _doctype_decl))) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
- self.buffer.consume(len);
- return Ok(());
+ // TODO: return error if there is a doctype decl
+ if let Some(decl) = decl {
+ let declaration = Declaration {
+ version_info: match *decl.version_info {
+ xml::VersionNum::One => VersionInfo::One,
+ xml::VersionNum::OneDotOne => VersionInfo::OneDotOne,
+ },
+ encoding_decl: decl
+ .encoding_decl
+ .map(|encoding_decl| (**encoding_decl).to_string()),
+ sd_decl: decl.sd_decl.map(|sd_decl| *sd_decl),
+ };
+ self.buffer.consume(len);
+ return Ok(Some(declaration));
+ } else {
+ self.buffer.consume(len);
+ return Ok(None);
+ }
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => {}
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -68,9 +100,18 @@ where
}
}
+ pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> {
+ let element = self.read_start_tag().await?;
+ FromElement::from_element(element)
+ }
+
+ pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> {
+ let element = self.read_element().await?;
+ FromElement::from_element(element)
+ }
+
pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::STag::parse(input) {
Ok((rest, e)) => {
@@ -84,7 +125,9 @@ where
return Ok(element);
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => {}
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -95,7 +138,6 @@ where
pub async fn read_end_tag<'s>(&'s mut self) -> Result<()> {
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::ETag::parse(input) {
Ok((rest, e)) => {
@@ -109,7 +151,9 @@ where
return Ok(());
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => {}
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -120,7 +164,6 @@ where
pub async fn read_element<'s>(&'s mut self) -> Result<Element> {
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Element::parse(input) {
Ok((rest, e)) => {
@@ -131,7 +174,9 @@ where
return Ok(element);
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => {}
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -144,7 +189,6 @@ where
let mut last_char = false;
let mut text = String::new();
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
if last_char == false {
match xml::CharData::parse(input) {
@@ -155,7 +199,9 @@ where
last_char = true;
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_needed) => continue,
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
_ => match xml::ContentItem::parse(input) {
Ok((rest, content_item)) => match content_item {
xml::ContentItem::Element(element) => {
@@ -207,7 +253,9 @@ where
}
},
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => continue,
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -263,7 +311,9 @@ where
}
},
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => continue,
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -622,7 +672,11 @@ impl<R> Reader<R> {
for (content_item, char_data) in xml_content.content {
match content_item {
xml::ContentItem::Element(element) => {
- text.map(|text| content.push(Content::Text(text)));
+ text.map(|text| {
+ if !text.is_empty() {
+ content.push(Content::Text(text))
+ }
+ });
content.push(Content::Element(Self::element_from_xml(
namespaces, element,
)?));
@@ -655,7 +709,11 @@ impl<R> Reader<R> {
}
}
}
- text.map(|text| content.push(Content::Text(text)));
+ text.map(|text| {
+ if !text.is_empty() {
+ content.push(Content::Text(text))
+ }
+ });
Ok(content)
}
}