aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar cel 🌸 <cel@bunny.garden>2024-11-24 02:05:41 +0000
committerLibravatar cel 🌸 <cel@bunny.garden>2024-11-24 02:05:41 +0000
commit87e6ff405b0d687ed341f304fba7c5b391a49359 (patch)
treef56ddd5271fb2bb104f641c035e58a744038f5cf
parentc1e6f7e918eacaad9c8b1a4b27fcd4d6245aaf68 (diff)
downloadpeanuts-87e6ff405b0d687ed341f304fba7c5b391a49359.tar.gz
peanuts-87e6ff405b0d687ed341f304fba7c5b391a49359.tar.bz2
peanuts-87e6ff405b0d687ed341f304fba7c5b391a49359.zip
misc
-rw-r--r--src/declaration.rs21
-rw-r--r--src/element.rs19
-rw-r--r--src/error.rs8
-rw-r--r--src/lib.rs11
-rw-r--r--src/reader.rs108
-rw-r--r--src/writer.rs75
-rw-r--r--src/xml/mod.rs44
7 files changed, 240 insertions, 46 deletions
diff --git a/src/declaration.rs b/src/declaration.rs
new file mode 100644
index 0000000..2c0855f
--- /dev/null
+++ b/src/declaration.rs
@@ -0,0 +1,21 @@
+pub struct Declaration {
+ pub version_info: VersionInfo,
+ pub encoding_decl: Option<String>,
+ pub sd_decl: Option<bool>,
+}
+
+#[derive(Clone, Copy)]
+pub enum VersionInfo {
+ One,
+ OneDotOne,
+}
+
+impl Declaration {
+ pub fn version(version: VersionInfo) -> Self {
+ Self {
+ version_info: version,
+ encoding_decl: None,
+ sd_decl: None,
+ }
+ }
+}
diff --git a/src/element.rs b/src/element.rs
index 04f2e5e..2b149a8 100644
--- a/src/element.rs
+++ b/src/element.rs
@@ -9,8 +9,22 @@ use std::{
use crate::{
error::Error,
xml::{self, parsers_complete::Parser, Attribute},
+ Result,
};
+pub trait FromElement: Sized {
+ fn from_element(element: Element) -> Result<Self>;
+}
+
+pub trait IntoElement {
+ fn into_element(&self) -> Element;
+
+ fn get_content(&self) -> Vec<Content> {
+ let element = self.into_element();
+ element.content
+ }
+}
+
// when are namespaces names chosen then if they are automatically calculated
// namespaces are held by readers and writers.
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
@@ -26,7 +40,7 @@ pub struct Name {
pub local_name: String,
}
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub enum Content {
Element(Element),
Text(String),
@@ -35,7 +49,7 @@ pub enum Content {
}
// should this be a trait?
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct Element {
pub name: Name,
// namespace: Name,
@@ -51,6 +65,7 @@ pub struct Element {
// you can validate the prefix and calculate the namespace from the current reader context
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
pub attributes: HashMap<Name, String>,
+ // TODO: make a hashmap maybe? to be able to address parts of the content individually
pub content: Vec<Content>,
}
diff --git a/src/error.rs b/src/error.rs
index 69993ed..eda527e 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,6 +1,6 @@
use std::{num::ParseIntError, str::Utf8Error};
-use crate::element::{Name, NamespaceDeclaration};
+use crate::element::{Content, Name, NamespaceDeclaration};
#[derive(Debug)]
pub enum Error {
@@ -17,6 +17,12 @@ pub enum Error {
NotInElement(String),
ExtraData(String),
UndeclaredNamespace(String),
+ IncorrectName(Name),
+ UnexpectedAttribute(Name),
+ DeserializeError(String),
+ UnexpectedNumberOfContents(usize),
+ UnexpectedContent(Content),
+ UnexpectedElement(Name),
}
impl From<std::io::Error> for Error {
diff --git a/src/lib.rs b/src/lib.rs
index e8486c4..2e38d4e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,16 @@
-mod element;
+pub mod declaration;
+pub mod element;
mod error;
pub mod reader;
mod writer;
pub mod xml;
pub type Result<T> = std::result::Result<T, error::Error>;
+
+pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace";
+pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/";
+
+pub use element::Element;
+pub use error::Error;
+pub use reader::Reader;
+pub use writer::Writer;
diff --git a/src/reader.rs b/src/reader.rs
index f1f3744..ee8d491 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -2,7 +2,7 @@ use circular::Buffer;
use futures::{FutureExt, Stream};
use nom::Err;
use std::{
- collections::{BTreeMap, HashMap, HashSet},
+ collections::{hash_set, BTreeMap, HashMap, HashSet},
future::Future,
path::Prefix,
pin::{pin, Pin},
@@ -10,19 +10,20 @@ use std::{
};
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
-static MAX_STANZA_SIZE: usize = 65536;
-
use crate::{
- element::{Content, Element, Name, NamespaceDeclaration},
+ declaration::{Declaration, VersionInfo},
+ element::{Content, Element, FromElement, Name, NamespaceDeclaration},
error::Error,
xml::{self, parsers::Parser},
- Result,
+ Result, XMLNS_NS, XML_NS,
};
+static MAX_STANZA_SIZE: usize = 65536;
+
/// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> {
inner: R,
- buffer: Buffer,
+ pub buffer: Buffer,
// holds which tags we are in atm over depth
// to have names reference namespaces could
depth: Vec<Name>,
@@ -31,13 +32,27 @@ pub struct Reader<R> {
impl<R> Reader<R> {
pub fn new(reader: R) -> Self {
+ let mut default_declarations = HashSet::new();
+ default_declarations.insert(NamespaceDeclaration {
+ prefix: Some("xml".to_string()),
+ namespace: XML_NS.to_string(),
+ });
+ default_declarations.insert(NamespaceDeclaration {
+ prefix: Some("xmlns".to_string()),
+ namespace: XMLNS_NS.to_string(),
+ });
Self {
inner: reader,
buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
depth: Vec::new(),
- namespace_declarations: Vec::new(),
+ // TODO: make sure reserved namespaces are never overwritten
+ namespace_declarations: vec![default_declarations],
}
}
+
+ pub fn into_inner(self) -> R {
+ self.inner
+ }
}
impl<R> Reader<R>
@@ -48,18 +63,35 @@ where
Ok(self.inner.read_buf(&mut self.buffer).await?)
}
- pub async fn read_prolog<'s>(&'s mut self) -> Result<()> {
+ pub async fn read_prolog<'s>(&'s mut self) -> Result<Option<Declaration>> {
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Prolog::parse(input) {
- Ok((rest, _prolog)) => {
+ Ok((rest, (decl, _misc, _doctype_decl))) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
- self.buffer.consume(len);
- return Ok(());
+ // TODO: return error if there is a doctype decl
+ if let Some(decl) = decl {
+ let declaration = Declaration {
+ version_info: match *decl.version_info {
+ xml::VersionNum::One => VersionInfo::One,
+ xml::VersionNum::OneDotOne => VersionInfo::OneDotOne,
+ },
+ encoding_decl: decl
+ .encoding_decl
+ .map(|encoding_decl| (**encoding_decl).to_string()),
+ sd_decl: decl.sd_decl.map(|sd_decl| *sd_decl),
+ };
+ self.buffer.consume(len);
+ return Ok(Some(declaration));
+ } else {
+ self.buffer.consume(len);
+ return Ok(None);
+ }
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => {}
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -68,9 +100,18 @@ where
}
}
+ pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> {
+ let element = self.read_start_tag().await?;
+ FromElement::from_element(element)
+ }
+
+ pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> {
+ let element = self.read_element().await?;
+ FromElement::from_element(element)
+ }
+
pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::STag::parse(input) {
Ok((rest, e)) => {
@@ -84,7 +125,9 @@ where
return Ok(element);
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => {}
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -95,7 +138,6 @@ where
pub async fn read_end_tag<'s>(&'s mut self) -> Result<()> {
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::ETag::parse(input) {
Ok((rest, e)) => {
@@ -109,7 +151,9 @@ where
return Ok(());
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => {}
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -120,7 +164,6 @@ where
pub async fn read_element<'s>(&'s mut self) -> Result<Element> {
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Element::parse(input) {
Ok((rest, e)) => {
@@ -131,7 +174,9 @@ where
return Ok(element);
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => {}
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -144,7 +189,6 @@ where
let mut last_char = false;
let mut text = String::new();
loop {
- self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
if last_char == false {
match xml::CharData::parse(input) {
@@ -155,7 +199,9 @@ where
last_char = true;
}
std::result::Result::Err(e) => match e {
- Err::Incomplete(_needed) => continue,
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
_ => match xml::ContentItem::parse(input) {
Ok((rest, content_item)) => match content_item {
xml::ContentItem::Element(element) => {
@@ -207,7 +253,9 @@ where
}
},
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => continue,
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -263,7 +311,9 @@ where
}
},
std::result::Result::Err(e) => match e {
- Err::Incomplete(_) => continue,
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ }
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@@ -622,7 +672,11 @@ impl<R> Reader<R> {
for (content_item, char_data) in xml_content.content {
match content_item {
xml::ContentItem::Element(element) => {
- text.map(|text| content.push(Content::Text(text)));
+ text.map(|text| {
+ if !text.is_empty() {
+ content.push(Content::Text(text))
+ }
+ });
content.push(Content::Element(Self::element_from_xml(
namespaces, element,
)?));
@@ -655,7 +709,11 @@ impl<R> Reader<R> {
}
}
}
- text.map(|text| content.push(Content::Text(text)));
+ text.map(|text| {
+ if !text.is_empty() {
+ content.push(Content::Text(text))
+ }
+ });
Ok(content)
}
}
diff --git a/src/writer.rs b/src/writer.rs
index dc5b48a..e319fdc 100644
--- a/src/writer.rs
+++ b/src/writer.rs
@@ -5,10 +5,11 @@ use futures::Sink;
use tokio::io::{AsyncWrite, AsyncWriteExt};
use crate::{
- element::{escape_str, Content, Element, Name, NamespaceDeclaration},
+ declaration::{Declaration, VersionInfo},
+ element::{escape_str, Content, Element, IntoElement, Name, NamespaceDeclaration},
error::Error,
- xml::{self, composers::Composer, parsers_complete::Parser, ETag},
- Result,
+ xml::{self, composers::Composer, parsers_complete::Parser, ETag, XMLDecl},
+ Result, XMLNS_NS, XML_NS,
};
// pub struct Writer<W, C = Composer> {
@@ -20,21 +21,69 @@ pub struct Writer<W> {
impl<W> Writer<W> {
pub fn new(writer: W) -> Self {
+ let mut default_declarations = HashSet::new();
+ default_declarations.insert(NamespaceDeclaration {
+ prefix: Some("xml".to_string()),
+ namespace: XML_NS.to_string(),
+ });
+ default_declarations.insert(NamespaceDeclaration {
+ prefix: Some("xmlns".to_string()),
+ namespace: XMLNS_NS.to_string(),
+ });
Self {
inner: writer,
depth: Vec::new(),
- namespace_declarations: Vec::new(),
+ namespace_declarations: vec![default_declarations],
}
}
+
+ pub fn into_inner(self) -> W {
+ self.inner
+ }
}
impl<W: AsyncWrite + Unpin + Send> Writer<W> {
+ pub async fn write_declaration(&mut self, version: VersionInfo) -> Result<()> {
+ let declaration = Declaration::version(version);
+ let version_info;
+ match declaration.version_info {
+ VersionInfo::One => version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::One),
+ VersionInfo::OneDotOne => {
+ version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::OneDotOne)
+ }
+ }
+ let declaration = xml::XMLDecl {
+ version_info,
+ encoding_decl: None,
+ sd_decl: None,
+ };
+ declaration.write(&mut self.inner).await?;
+ Ok(())
+ }
+
+ pub async fn write_full(&mut self, into_element: &impl IntoElement) -> Result<()> {
+ let element = into_element.into_element();
+ Ok(self.write_element(&element).await?)
+ }
+
+ pub async fn write_start(&mut self, into_element: &impl IntoElement) -> Result<()> {
+ let element = into_element.into_element();
+ Ok(self.write_element_start(&element).await?)
+ }
+
+ pub async fn write_all_content(&mut self, into_element: &impl IntoElement) -> Result<()> {
+ for content in &into_element.get_content() {
+ self.write_content(content).await?;
+ }
+ Ok(())
+ }
+
#[async_recursion]
pub async fn write_element(&mut self, element: &Element) -> Result<()> {
if element.content.is_empty() {
self.write_empty(element).await?;
} else {
- self.write_start(element).await?;
+ self.write_element_start(element).await?;
for content in &element.content {
self.write_content(content).await?;
}
@@ -107,12 +156,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
if let Some(prefix) = &prefix {
att_name = xml::QName::PrefixedName(xml::PrefixedName {
prefix: xml::Prefix::parse_full(prefix)?,
- local_part: xml::LocalPart::parse_full(&element.name.local_name)?,
+ local_part: xml::LocalPart::parse_full(&name.local_name)?,
})
} else {
- att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(
- &element.name.local_name,
- )?)
+ att_name =
+ xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)
}
let value = xml::AttValue::from(value.as_str());
@@ -131,7 +179,7 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
Ok(())
}
- pub async fn write_start(&mut self, element: &Element) -> Result<()> {
+ pub async fn write_element_start(&mut self, element: &Element) -> Result<()> {
let namespace_declarations_stack: Vec<_> = self
.namespace_declarations
.iter()
@@ -195,12 +243,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
if let Some(prefix) = &prefix {
att_name = xml::QName::PrefixedName(xml::PrefixedName {
prefix: xml::Prefix::parse_full(prefix)?,
- local_part: xml::LocalPart::parse_full(&element.name.local_name)?,
+ local_part: xml::LocalPart::parse_full(&name.local_name)?,
})
} else {
- att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(
- &element.name.local_name,
- )?)
+ att_name =
+ xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)
}
let value = xml::AttValue::from(value.as_str());
diff --git a/src/xml/mod.rs b/src/xml/mod.rs
index 3150df0..43f3027 100644
--- a/src/xml/mod.rs
+++ b/src/xml/mod.rs
@@ -389,9 +389,9 @@ pub type Prolog<'s> = (
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
#[derive(Debug)]
pub struct XMLDecl<'s> {
- version_info: VersionInfo,
- encoding_decl: Option<EncodingDecl<'s>>,
- sd_decl: Option<SDDecl>,
+ pub(crate) version_info: VersionInfo,
+ pub(crate) encoding_decl: Option<EncodingDecl<'s>>,
+ pub(crate) sd_decl: Option<SDDecl>,
}
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
@@ -401,6 +401,17 @@ pub enum VersionInfo {
DoubleQuoted(VersionNum),
}
+impl Deref for VersionInfo {
+ type Target = VersionNum;
+
+ fn deref(&self) -> &Self::Target {
+ match self {
+ VersionInfo::SingleQuoted(version_num) => version_num,
+ VersionInfo::DoubleQuoted(version_num) => version_num,
+ }
+ }
+}
+
/// [25] Eq ::= S? '=' S?
#[derive(Clone)]
pub struct Eq;
@@ -479,6 +490,17 @@ pub enum SDDecl {
DoubleQuoted(bool),
}
+impl Deref for SDDecl {
+ type Target = bool;
+
+ fn deref(&self) -> &Self::Target {
+ match self {
+ SDDecl::SingleQuoted(b) => b,
+ SDDecl::DoubleQuoted(b) => b,
+ }
+ }
+}
+
// (Productions 33 through 38 have been removed.)
/// [39] element ::= EmptyElemTag | STag content ETag
@@ -846,10 +868,26 @@ pub struct ExtParsedEnt<'s> {
// TODO?: select quote version
pub struct EncodingDecl<'s>(EncName<'s>);
+impl<'s> Deref for EncodingDecl<'s> {
+ type Target = EncName<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
#[derive(Debug)]
pub struct EncName<'s>(&'s str);
+impl<'s> Deref for EncName<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
#[derive(Debug)]
pub enum NotationDeclID<'s> {
External(ExternalID<'s>),