aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar cel 🌸 <cel@bunny.garden>2024-11-10 22:28:55 +0000
committerLibravatar cel 🌸 <cel@bunny.garden>2024-11-10 22:28:55 +0000
commit6d4832480b1804652bb4faa33b361ffb43734270 (patch)
treec79d65f833283d43e75f1cb4518a682b7933a049
parent140af50536ebc32ae6461852daa2df0fc2d197ca (diff)
downloadpeanuts-6d4832480b1804652bb4faa33b361ffb43734270.tar.gz
peanuts-6d4832480b1804652bb4faa33b361ffb43734270.tar.bz2
peanuts-6d4832480b1804652bb4faa33b361ffb43734270.zip
WIP: impl Stream for Reader
-rw-r--r--src/error.rs1
-rw-r--r--src/reader.rs362
-rw-r--r--src/xml/composers.rs1
-rw-r--r--src/xml/mod.rs18
-rw-r--r--src/xml/parsers.rs17
5 files changed, 305 insertions, 94 deletions
diff --git a/src/error.rs b/src/error.rs
index 1f9c1e6..c84c7d0 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -14,6 +14,7 @@ pub enum Error {
DuplicateAttribute(String),
UnqualifiedNamespace(String),
MismatchedEndTag(String, String),
+ NotInElement(String),
}
impl From<std::io::Error> for Error {
diff --git a/src/reader.rs b/src/reader.rs
index bca8edd..dc16d31 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -1,9 +1,11 @@
use circular::Buffer;
-use futures::Stream;
+use futures::{FutureExt, Stream};
use nom::Err;
use std::{
collections::{BTreeMap, HashMap, HashSet},
+ future::Future,
path::Prefix,
+ pin::{pin, Pin},
str::{self, FromStr},
};
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
@@ -42,14 +44,77 @@ impl<R> Reader<R>
where
R: AsyncRead + Unpin,
{
- async fn read_buf(&mut self) -> Result<usize> {
+ async fn read_buf<'s>(&mut self) -> Result<usize> {
Ok(self.inner.read_buf(&mut self.buffer).await?)
}
+ async fn read_prolog<'s>(&'s mut self) -> Result<()> {
+ loop {
+ self.read_buf().await?;
+ let input = str::from_utf8(self.buffer.data())?;
+ match xml::Prolog::parse(input) {
+ Ok((rest, _prolog)) => {
+ let len = self.buffer.available_data() - rest.as_bytes().len();
+ self.buffer.consume(len);
+ return Ok(());
+ }
+ std::result::Result::Err(e) => match e {
+ Err::Incomplete(_) => {}
+ // TODO: better error
+ Err::Error(e) => return Err(Error::ParseError(e.to_string())),
+ Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
+ },
+ }
+ }
+ }
+
+ async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
+ loop {
+ self.read_buf().await?;
+ let input = str::from_utf8(self.buffer.data())?;
+ match xml::STag::parse(input) {
+ Ok((rest, e)) => {
+ let len = self.buffer.available_data() - rest.as_bytes().len();
+ let element =
+ Reader::<R>::start_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
+ self.buffer.consume(len);
+ return Ok(element);
+ }
+ std::result::Result::Err(e) => match e {
+ Err::Incomplete(_) => {}
+ // TODO: better error
+ Err::Error(e) => return Err(Error::ParseError(e.to_string())),
+ Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
+ },
+ }
+ }
+ }
+
+ async fn read_end_tag<'s>(&'s mut self) -> Result<()> {
+ loop {
+ self.read_buf().await?;
+ let input = str::from_utf8(self.buffer.data())?;
+ match xml::ETag::parse(input) {
+ Ok((rest, e)) => {
+ let len = self.buffer.available_data() - rest.as_bytes().len();
+ Reader::<R>::end_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
+ self.buffer.consume(len);
+ return Ok(());
+ }
+ std::result::Result::Err(e) => match e {
+ Err::Incomplete(_) => {}
+ // TODO: better error
+ Err::Error(e) => return Err(Error::ParseError(e.to_string())),
+ Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
+ },
+ }
+ }
+ }
+
async fn read_element<'s>(&'s mut self) -> Result<Element> {
- self.read_buf().await?;
- let mut input = str::from_utf8(self.buffer.data())?;
loop {
+ self.read_buf().await?;
+ let input = str::from_utf8(self.buffer.data())?;
match xml::Element::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
@@ -58,10 +123,37 @@ where
return Ok(element);
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => {
- self.read_buf().await?;
- input = str::from_utf8(self.buffer.data())?;
+ Err::Incomplete(_) => {}
+ // TODO: better error
+ Err::Error(e) => return Err(Error::ParseError(e.to_string())),
+ Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
+ },
+ }
+ }
+ }
+
+ async fn read_content<'s>(&'s mut self) -> Result<Content> {
+ loop {
+ self.read_buf().await?;
+ let input = str::from_utf8(self.buffer.data())?;
+
+ match xml::ContentItem::parse(input) {
+ Ok((rest, c)) => {
+ match c {
+ xml::ContentItem::CharData(char_data) => todo!(),
+ xml::ContentItem::Element(element) => todo!(),
+ xml::ContentItem::Reference(reference) => todo!(),
+ xml::ContentItem::CDSect(cdsect) => todo!(),
+ xml::ContentItem::PI(pi) => todo!(),
+ xml::ContentItem::Comment(comment) => todo!(),
}
+ let len = self.buffer.available_data() - rest.as_bytes().len();
+ let content = Reader::<R>::content_item_from_xml(&mut self.namespaces, e)?;
+ self.buffer.consume(len);
+ return Ok(element);
+ }
+ std::result::Result::Err(e) => match e {
+ Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -72,6 +164,145 @@ where
}
impl<R> Reader<R> {
+ fn content_item_from_xml(
+ namespaces: &mut Vec<HashSet<Namespace>>,
+ item: xml::ContentItem,
+ ) -> Result<Content> {
+ todo!()
+ }
+
+ fn start_tag_from_xml(
+ depth: &mut Vec<Name>,
+ namespaces: &mut Vec<HashSet<Namespace>>,
+ s_tag: xml::STag,
+ ) -> Result<Element> {
+ let mut namespace_declarations = HashSet::new();
+ for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
+ Some((ns_name, value))
+ } else {
+ None
+ }
+ }) {
+ let prefix = match prefix {
+ xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
+ Some(prefixed_att_name.to_string())
+ }
+ xml::NSAttName::DefaultAttName => None,
+ };
+ let namespace = Namespace {
+ prefix,
+ namespace: namespace.process()?,
+ };
+ if !namespace_declarations.insert(namespace.clone()) {
+ return Err(Error::DuplicateNameSpace(namespace));
+ }
+ }
+
+ // all namespaces available to the element (from both parent elements and element itself)
+ let namespace_stack: Vec<&Namespace> = namespaces
+ .iter()
+ .flatten()
+ .chain(namespace_declarations.iter())
+ .collect();
+
+ let mut attributes = HashMap::new();
+
+ for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::Attribute { name, value } = attribute {
+ Some((name, value))
+ } else {
+ None
+ }
+ }) {
+ let namespace;
+ let attribute_name;
+ match q_name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack.iter().rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ });
+ attribute_name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ attribute_name = unprefixed_name.to_string();
+ }
+ }
+ if let Some(namespace) = namespace {
+ let namespace = (*namespace).clone();
+ let name = Name {
+ namespace,
+ name: attribute_name,
+ };
+ let value = value.process()?;
+ if let Some(_value) = attributes.insert(name, value) {
+ return Err(Error::DuplicateAttribute(q_name.to_string()));
+ }
+ } else {
+ return Err(Error::UnqualifiedNamespace(q_name.to_string()));
+ }
+ }
+
+ let name;
+ let namespace;
+ match &s_tag.name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix.as_deref() == Some(**prefixed_name.prefix));
+ name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ name = unprefixed_name.to_string();
+ }
+ }
+
+ let namespace = (*namespace
+ .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
+ .clone();
+
+ let name = Name { namespace, name };
+
+ depth.push(name.clone());
+
+ namespaces.push(namespace_declarations.clone());
+
+ return Ok(Element {
+ name,
+ namespace_decl: namespace_declarations,
+ attributes,
+ content: Vec::new(),
+ });
+ }
+
+ fn end_tag_from_xml(
+ depth: &mut Vec<Name>,
+ namespaces: &mut Vec<HashSet<Namespace>>,
+ e_tag: xml::ETag,
+ ) -> Result<()> {
+ if let Some(s_tag_name) = depth.pop() {
+ if s_tag_name.namespace.prefix.as_deref() == e_tag.name.prefix()
+ && s_tag_name.name == e_tag.name.local_part()
+ {
+ namespaces.pop();
+ return Ok(());
+ } else {
+ return Err(Error::MismatchedEndTag(
+ s_tag_name.name,
+ e_tag.name.to_string(),
+ ));
+ }
+ } else {
+ return Err(Error::NotInElement(e_tag.name.to_string()));
+ }
+ }
+
fn element_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>,
element: xml::Element,
@@ -343,88 +574,22 @@ impl<R> Reader<R> {
}
}
-// impl<R> Reader<R>
-// where
-// R: AsyncBufReadExt + Unpin,
-// {
-// /// could resursively read and include namespace tree with values to be shadowed within new local context
-// async fn read_recursive(&mut self, namespaces: BTreeMap<Option<String>, String>) -> Result<Element, Error> {
-// let element;
-// let len;
-// loop {
-// let buf = self.inner.fill_buf().await?;
-// let input = str::from_utf8(buf)?;
-// match crate::xml::element(input) {
-// Ok((rest, e)) => {
-// element = e;
-// len = buf.len() - rest.len();
-// break;
-// }
-// Err(e) => match e {
-// Err::Incomplete(_) => (),
-// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
-// },
-// }
-// }
-
-// let final;
-// match element {
-// crate::xml::Element::Empty(e) => {
-// let final = Element {
-
-// }
-// },
-// crate::xml::Element::NotEmpty(_, _, _) => todo!(),
-// }
-
-// self.inner.consume(len);
-// todo!()
-// }
-// /// reads entire next prolog, element, or misc
-// pub async fn read<E: From<Element>>(&mut self) -> Result<E, Error> {
-// let element;
-// let len;
-// loop {
-// let buf = self.inner.fill_buf().await?;
-// let input = str::from_utf8(buf)?;
-// match crate::xml::element(input) {
-// Ok((rest, e)) => {
-// element = e;
-// len = buf.len() - rest.len();
-// break;
-// }
-// Err(e) => match e {
-// Err::Incomplete(_) => (),
-// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
-// },
-// }
-// }
-// self.inner.consume(len);
-
-// // Ok(element)
-// todo!()
-// }
-// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
-// todo!()
-// }
-// pub async fn read_end(&self) -> Result<(), Error> {
-// todo!()
-// }
-// }
-
-// impl<R: AsyncBufRead> Stream for Reader<R> {
-// type Item = impl From<Element>;
-
-// async fn poll_next(
-// self: std::pin::Pin<&mut Self>,
-// cx: &mut std::task::Context<'_>,
-// ) -> std::task::Poll<Option<Self::Item>> {
-// todo!()
-// }
-// }
+impl<R: AsyncRead + Unpin> Stream for Reader<R> {
+ type Item = Result<Content>;
+
+ fn poll_next(
+ self: std::pin::Pin<&mut Self>,
+ cx: &mut std::task::Context<'_>,
+ ) -> std::task::Poll<Option<Self::Item>> {
+ let mut e = self;
+ let mut pinned = pin!(e.read_content());
+ pinned.as_mut().poll(cx).map(|result| Some(result))
+ }
+}
#[cfg(test)]
mod test {
+ use futures::{sink::Buffer, StreamExt};
use tokio::io::AsyncRead;
use super::Reader;
@@ -448,10 +613,7 @@ mod test {
}
}
- #[tokio::test]
- async fn test_element_read() {
- let mock = MockAsyncReader::new(
- "<xs:schema
+ const TEST_DOC: &'static str = "<xs:schema
xmlns:xs='http://www.w3.org/2001/XMLSchema'
targetNamespace='http://etherx.jabber.org/streams'
xmlns='http://etherx.jabber.org/streams'
@@ -529,11 +691,25 @@ mod test {
</xs:complexType>
</xs:element>
- </xs:schema>asdf
-",
- );
+ </xs:schema>asdf";
+
+ #[tokio::test]
+ async fn test_element_read() {
+ let mock = MockAsyncReader::new(TEST_DOC);
let mut reader = Reader::new(mock);
let element = reader.read_element().await.unwrap();
println!("{:#?}", element);
}
+
+ #[tokio::test]
+ async fn test_element_stream() {
+ let mock = MockAsyncReader::new(TEST_DOC);
+ let mut reader = Reader::new(mock);
+ let element = reader.read_start_tag().await.unwrap();
+ println!("{:#?}", element);
+ loop {
+ let element = reader.next().await.unwrap();
+ println!("{:#?}", element);
+ }
+ }
}
diff --git a/src/xml/composers.rs b/src/xml/composers.rs
index 949bb65..b8fbe13 100644
--- a/src/xml/composers.rs
+++ b/src/xml/composers.rs
@@ -817,6 +817,7 @@ impl<'s> Composer<'s> for Content<'s> {
ContentItem::CDSect(cd_sect) => cd_sect.write(writer).await?,
ContentItem::PI(pi) => pi.write(writer).await?,
ContentItem::Comment(comment) => comment.write(writer).await?,
+ _ => todo!("verify no split chardata"),
}
if let Some(char_data) = char_data {
char_data.write(writer).await?;
diff --git a/src/xml/mod.rs b/src/xml/mod.rs
index 221c334..9424d0b 100644
--- a/src/xml/mod.rs
+++ b/src/xml/mod.rs
@@ -47,6 +47,22 @@ pub enum QName<'s> {
UnprefixedName(UnprefixedName<'s>),
}
+impl<'s> QName<'s> {
+ pub fn prefix(&self) -> Option<&'s str> {
+ match self {
+ QName::PrefixedName(prefixed_name) => return Some(**prefixed_name.prefix),
+ QName::UnprefixedName(_) => return None,
+ }
+ }
+
+ pub fn local_part(&self) -> &str {
+ match self {
+ QName::PrefixedName(prefixed_name) => return **prefixed_name.local_part,
+ QName::UnprefixedName(unprefixed_name) => return ****unprefixed_name,
+ }
+ }
+}
+
impl<'s> ToString for QName<'s> {
fn to_string(&self) -> String {
match self {
@@ -473,7 +489,7 @@ pub struct ETag<'s> {
#[derive(Debug, Clone)]
pub enum ContentItem<'s> {
- // CharData(&'s str),
+ CharData(CharData<'s>),
Element(Element<'s>),
Reference(Reference<'s>),
CDSect(CDSect<'s>),
diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs
index 3f67be7..3cbefd3 100644
--- a/src/xml/parsers.rs
+++ b/src/xml/parsers.rs
@@ -733,6 +733,23 @@ impl<'s> Parser<'s, ETag<'s>> for ETag<'s> {
}
}
+impl<'s> Parser<'s, ContentItem<'s>> for ContentItem<'s> {
+ fn parse(input: &'s str) -> IResult<&str, ContentItem<'s>> {
+ alt((
+ map(CharData::parse, |char_data| {
+ ContentItem::CharData(char_data)
+ }),
+ map(Element::parse, |element| ContentItem::Element(element)),
+ map(Reference::parse, |reference| {
+ ContentItem::Reference(reference)
+ }),
+ map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)),
+ map(PI::parse, |pi| ContentItem::PI(pi)),
+ map(Comment::parse, |comment| ContentItem::Comment(comment)),
+ ))(input)
+ }
+}
+
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
impl<'s> Parser<'s, Content<'s>> for Content<'s> {
fn parse(input: &'s str) -> IResult<&str, Content<'s>> {