aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar cel 🌸 <cel@bunny.garden>2024-11-19 14:52:14 +0000
committerLibravatar cel 🌸 <cel@bunny.garden>2024-11-19 14:52:14 +0000
commitc8ed16a2d1d1f1b5278936eeadeae87da7cb104e (patch)
treea05ee9522535faab24ad2917918f30177a1aa712
parent4c235b972e2400bd2bb92b74b12558b485dc7142 (diff)
downloadpeanuts-c8ed16a2d1d1f1b5278936eeadeae87da7cb104e.tar.gz
peanuts-c8ed16a2d1d1f1b5278936eeadeae87da7cb104e.tar.bz2
peanuts-c8ed16a2d1d1f1b5278936eeadeae87da7cb104e.zip
store only namespace name in qualified name, not namespace declaration
-rw-r--r--src/element.rs6
-rw-r--r--src/error.rs5
-rw-r--r--src/reader.rs121
-rw-r--r--src/writer.rs43
-rw-r--r--src/xml/parsers_complete.rs13
5 files changed, 136 insertions, 52 deletions
diff --git a/src/element.rs b/src/element.rs
index 9bdde35..4c39c6a 100644
--- a/src/element.rs
+++ b/src/element.rs
@@ -10,7 +10,7 @@ use crate::{
// when are namespaces names chosen then if they are automatically calculated
// namespaces are held by readers and writers.
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
-pub struct Namespace {
+pub struct NamespaceDeclaration {
pub prefix: Option<String>,
pub namespace: String,
}
@@ -18,7 +18,7 @@ pub struct Namespace {
// names are qualified, they contain a reference to the namespace (held within the reader/writer)
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub struct Name {
- pub namespace: Namespace,
+ pub namespace: String,
pub name: String,
}
@@ -40,7 +40,7 @@ pub struct Element {
// namespace: String,
// hashmap of explicit namespace declarations on the element itself only
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
- pub namespace_decl: HashSet<Namespace>,
+ // pub namespace_decl: HashSet<Namespace>,
// attributes can be in a different namespace than the element. how to make sure they are valid?
// maybe include the namespace instead of or with the prefix
// you can calculate the prefix from the namespaced name and the current writer context
diff --git a/src/error.rs b/src/error.rs
index c84c7d0..2d96666 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,6 +1,6 @@
use std::{num::ParseIntError, str::Utf8Error};
-use crate::element::{Name, Namespace};
+use crate::element::{Name, NamespaceDeclaration};
#[derive(Debug)]
pub enum Error {
@@ -10,11 +10,12 @@ pub enum Error {
EntityProcessError(String),
// TODO: better choice for failures than string
InvalidCharRef(String),
- DuplicateNameSpace(Namespace),
+ DuplicateNameSpaceDeclaration(NamespaceDeclaration),
DuplicateAttribute(String),
UnqualifiedNamespace(String),
MismatchedEndTag(String, String),
NotInElement(String),
+ ExtraData(String),
}
impl From<std::io::Error> for Error {
diff --git a/src/reader.rs b/src/reader.rs
index 64a0ed8..654ca2a 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -13,7 +13,7 @@ use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
static MAX_STANZA_SIZE: usize = 65536;
use crate::{
- element::{Content, Element, Name, Namespace},
+ element::{Content, Element, Name, NamespaceDeclaration},
error::Error,
xml::{self, parsers::Parser},
Result,
@@ -26,7 +26,7 @@ pub struct Reader<R> {
// holds which tags we are in atm over depth
// to have names reference namespaces could
depth: Vec<Name>,
- namespaces: Vec<HashSet<Namespace>>,
+ namespace_declarations: Vec<HashSet<NamespaceDeclaration>>,
}
impl<R> Reader<R> {
@@ -35,7 +35,7 @@ impl<R> Reader<R> {
inner: reader,
buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
depth: Vec::new(),
- namespaces: Vec::new(),
+ namespace_declarations: Vec::new(),
}
}
}
@@ -75,8 +75,11 @@ where
match xml::STag::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
- let element =
- Reader::<R>::start_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
+ let element = Reader::<R>::start_tag_from_xml(
+ &mut self.depth,
+ &mut self.namespace_declarations,
+ e,
+ )?;
self.buffer.consume(len);
return Ok(element);
}
@@ -97,7 +100,11 @@ where
match xml::ETag::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
- Reader::<R>::end_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
+ Reader::<R>::end_tag_from_xml(
+ &mut self.depth,
+ &mut self.namespace_declarations,
+ e,
+ )?;
self.buffer.consume(len);
return Ok(());
}
@@ -118,7 +125,8 @@ where
match xml::Element::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
- let element = Reader::<R>::element_from_xml(&mut self.namespaces, e)?;
+ let element =
+ Reader::<R>::element_from_xml(&mut self.namespace_declarations, e)?;
self.buffer.consume(len);
return Ok(element);
}
@@ -156,8 +164,10 @@ where
} else {
let len =
self.buffer.available_data() - rest.as_bytes().len();
- let element =
- Self::element_from_xml(&mut self.namespaces, element)?;
+ let element = Self::element_from_xml(
+ &mut self.namespace_declarations,
+ element,
+ )?;
self.buffer.consume(len);
return Ok(Content::Element(element));
}
@@ -209,12 +219,15 @@ where
match xml::ContentItem::parse(input) {
Ok((rest, content_item)) => match content_item {
xml::ContentItem::Element(element) => {
+ // text can still be empty
if !text.is_empty() {
return Ok(Content::Text(text));
} else {
let len = self.buffer.available_data() - rest.as_bytes().len();
- let element =
- Self::element_from_xml(&mut self.namespaces, element)?;
+ let element = Self::element_from_xml(
+ &mut self.namespace_declarations,
+ element,
+ )?;
self.buffer.consume(len);
return Ok(Content::Element(element));
}
@@ -264,7 +277,7 @@ where
impl<R> Reader<R> {
fn start_tag_from_xml(
depth: &mut Vec<Name>,
- namespaces: &mut Vec<HashSet<Namespace>>,
+ namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
s_tag: xml::STag,
) -> Result<Element> {
let mut namespace_declarations = HashSet::new();
@@ -281,17 +294,17 @@ impl<R> Reader<R> {
}
xml::NSAttName::DefaultAttName => None,
};
- let namespace = Namespace {
+ let namespace = NamespaceDeclaration {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
- return Err(Error::DuplicateNameSpace(namespace));
+ return Err(Error::DuplicateNameSpaceDeclaration(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
- let namespace_stack: Vec<&Namespace> = namespaces
+ let namespace_stack: Vec<&NamespaceDeclaration> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
@@ -322,10 +335,9 @@ impl<R> Reader<R> {
attribute_name = unprefixed_name.to_string();
}
}
- if let Some(namespace) = namespace {
- let namespace = (*namespace).clone();
+ if let Some(namespace_declaration) = namespace {
let name = Name {
- namespace,
+ namespace: namespace_declaration.namespace.clone(),
name: attribute_name,
};
let value = value.process()?;
@@ -354,11 +366,14 @@ impl<R> Reader<R> {
}
}
- let namespace = (*namespace
+ let namespace_declaration = (*namespace
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
.clone();
- let name = Name { namespace, name };
+ let name = Name {
+ namespace: namespace_declaration.namespace,
+ name,
+ };
depth.push(name.clone());
@@ -366,7 +381,6 @@ impl<R> Reader<R> {
return Ok(Element {
name,
- namespace_decl: namespace_declarations,
attributes,
content: Vec::new(),
});
@@ -374,13 +388,38 @@ impl<R> Reader<R> {
fn end_tag_from_xml(
depth: &mut Vec<Name>,
- namespaces: &mut Vec<HashSet<Namespace>>,
+ namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
e_tag: xml::ETag,
) -> Result<()> {
if let Some(s_tag_name) = depth.pop() {
- if s_tag_name.namespace.prefix.as_deref() == e_tag.name.prefix()
- && s_tag_name.name == e_tag.name.local_part()
- {
+ let (namespace, name);
+ let namespace_declarations: Vec<_> = namespaces.iter().flatten().collect();
+ match e_tag.name {
+ xml::QName::PrefixedName(ref prefixed_name) => {
+ namespace = namespace_declarations
+ .iter()
+ .rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ })
+ .map(|namespace_decl| namespace_decl.namespace.clone())
+ .ok_or_else(|| {
+ return Error::UnqualifiedNamespace((&e_tag.name).to_string());
+ })?;
+ name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(ref unprefixed_name) => {
+ namespace = namespace_declarations
+ .iter()
+ .rfind(|namespace| namespace.prefix.as_deref() == None)
+ .map(|namespace_decl| namespace_decl.namespace.clone())
+ .ok_or_else(|| {
+ return Error::UnqualifiedNamespace(e_tag.name.to_string());
+ })?;
+ name = unprefixed_name.to_string();
+ }
+ }
+ let e_tag_name = Name { namespace, name };
+ if s_tag_name == e_tag_name {
namespaces.pop();
return Ok(());
} else {
@@ -395,7 +434,7 @@ impl<R> Reader<R> {
}
fn element_from_xml(
- namespaces: &mut Vec<HashSet<Namespace>>,
+ namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
element: xml::Element,
) -> Result<Element> {
match element {
@@ -416,17 +455,17 @@ impl<R> Reader<R> {
}
xml::NSAttName::DefaultAttName => None,
};
- let namespace = Namespace {
+ let namespace = NamespaceDeclaration {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
- return Err(Error::DuplicateNameSpace(namespace));
+ return Err(Error::DuplicateNameSpaceDeclaration(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
- let namespace_stack: Vec<&Namespace> = namespaces
+ let namespace_stack: Vec<&NamespaceDeclaration> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
@@ -460,7 +499,7 @@ impl<R> Reader<R> {
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
let name = Name {
- namespace,
+ namespace: namespace.namespace,
name: attribute_name,
};
let value = value.process()?;
@@ -493,11 +532,13 @@ impl<R> Reader<R> {
.ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?)
.clone();
- let name = Name { namespace, name };
+ let name = Name {
+ namespace: namespace.namespace,
+ name,
+ };
return Ok(Element {
name,
- namespace_decl: namespace_declarations,
attributes,
content: Vec::new(),
});
@@ -523,17 +564,17 @@ impl<R> Reader<R> {
}
xml::NSAttName::DefaultAttName => None,
};
- let namespace = Namespace {
+ let namespace = NamespaceDeclaration {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
- return Err(Error::DuplicateNameSpace(namespace));
+ return Err(Error::DuplicateNameSpaceDeclaration(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
- let namespace_stack: Vec<&Namespace> = namespaces
+ let namespace_stack: Vec<&NamespaceDeclaration> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
@@ -567,7 +608,7 @@ impl<R> Reader<R> {
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
let name = Name {
- namespace,
+ namespace: namespace.namespace,
name: attribute_name,
};
let value = value.process()?;
@@ -600,7 +641,10 @@ impl<R> Reader<R> {
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
.clone();
- let name = Name { namespace, name };
+ let name = Name {
+ namespace: namespace.namespace,
+ name,
+ };
namespaces.push(namespace_declarations.clone());
@@ -610,7 +654,6 @@ impl<R> Reader<R> {
return Ok(Element {
name,
- namespace_decl: namespace_declarations,
attributes,
content,
});
@@ -619,7 +662,7 @@ impl<R> Reader<R> {
}
fn content_from_xml(
- namespaces: &mut Vec<HashSet<Namespace>>,
+ namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
element: xml::Content,
) -> Result<Vec<Content>> {
let mut content = Vec::new();
diff --git a/src/writer.rs b/src/writer.rs
index 08be8c2..249ced5 100644
--- a/src/writer.rs
+++ b/src/writer.rs
@@ -1,26 +1,53 @@
-use futures::{AsyncWrite, Sink};
+use std::collections::HashSet;
+
+use futures::Sink;
+use tokio::io::AsyncWrite;
use crate::{
- element::{Element, Name, Namespace},
+ element::{Element, Name, NamespaceDeclaration},
error::Error,
+ xml::{self, composers::Composer, parsers_complete::Parser, ETag},
};
// pub struct Writer<W, C = Composer> {
pub struct Writer<W> {
- writer: W,
+ inner: W,
depth: Vec<Name>,
- namespaces: Vec<(usize, Namespace)>,
+ namespaces: Vec<HashSet<NamespaceDeclaration>>,
}
-impl<W: AsyncWrite> Writer<W> {
- pub async fn write(&self, element: impl Into<Element>) -> Result<(), Error> {
+impl<W: AsyncWrite + Unpin> Writer<W> {
+ pub async fn write(&mut self, element: Element) -> Result<(), Error> {
todo!()
}
- pub async fn write_start(&self, element: impl Into<Element>) -> Result<(), Error> {
+
+ pub async fn write_start(&mut self, element: Element) -> Result<(), Error> {
todo!()
}
- pub async fn write_end(&self) -> Result<(), Error> {
+
+ pub async fn write_end(&mut self) -> Result<(), Error> {
todo!()
+ // let e_tag;
+ // if let Some(name) = self.depth.pop() {
+ // if let Some(prefix) = name.namespace.prefix {
+ // e_tag = xml::ETag {
+ // name: xml::QName::PrefixedName(xml::PrefixedName {
+ // prefix: xml::Prefix::parse_full(&prefix)?,
+ // local_part: xml::LocalPart::parse_full(&name.name)?,
+ // }),
+ // };
+ // e_tag.write(&mut self.inner).await?;
+ // Ok(())
+ // } else {
+ // e_tag = xml::ETag {
+ // name: xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.name)?),
+ // };
+ // e_tag.write(&mut self.inner).await?;
+ // Ok(())
+ // }
+ // } else {
+ // return Err(Error::NotInElement("".to_string()));
+ // }
}
}
diff --git a/src/xml/parsers_complete.rs b/src/xml/parsers_complete.rs
index b057f62..900a3dd 100644
--- a/src/xml/parsers_complete.rs
+++ b/src/xml/parsers_complete.rs
@@ -30,6 +30,19 @@ pub trait Parser<'s> {
type Output;
fn parse(input: &'s str) -> IResult<&str, Self::Output>;
+
+ fn parse_full(input: &'s str) -> crate::Result<Self::Output> {
+ match <Self as Parser>::parse(input) {
+ Ok((rest, output)) => {
+ if rest.is_empty() {
+ return Ok(output);
+ } else {
+ return Err(crate::error::Error::ExtraData(rest.to_string()));
+ }
+ }
+ Result::Err(e) => return Err(crate::error::Error::ParseError(e.to_string())),
+ }
+ }
}
/// [1] NSAttName ::= PrefixedAttName | DefaultAttName