aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar cel 🌸 <cel@bunny.garden>2024-11-10 14:31:43 +0000
committerLibravatar cel 🌸 <cel@bunny.garden>2024-11-10 14:31:43 +0000
commitbe50ab4890993ae97bc79138364cd5e316566e46 (patch)
treecedf5db695f1a35a37e0c1df1efe1346d03477ea
parent593cad573baf239337c5869c92ea9e7aed61e847 (diff)
downloadpeanuts-be50ab4890993ae97bc79138364cd5e316566e46.tar.gz
peanuts-be50ab4890993ae97bc79138364cd5e316566e46.tar.bz2
peanuts-be50ab4890993ae97bc79138364cd5e316566e46.zip
implement element reading
-rw-r--r--Cargo.lock12
-rw-r--r--Cargo.toml1
-rw-r--r--src/element.rs44
-rw-r--r--src/error.rs17
-rw-r--r--src/lib.rs2
-rw-r--r--src/main.rs87
-rw-r--r--src/reader.rs325
-rw-r--r--src/xml/mod.rs241
8 files changed, 692 insertions, 37 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 6f2e8d8..215071a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -46,9 +46,9 @@ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
[[package]]
name = "bytes"
-version = "1.6.0"
+version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
+checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da"
[[package]]
name = "cc"
@@ -63,6 +63,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
+name = "circular"
+version = "0.3.0"
+dependencies = [
+ "bytes",
+]
+
+[[package]]
name = "futures"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -265,6 +272,7 @@ dependencies = [
name = "peanuts"
version = "0.1.0"
dependencies = [
+ "circular",
"futures",
"nom",
"tokio",
diff --git a/Cargo.toml b/Cargo.toml
index 76552ac..5586a6e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
+circular = { version = "0.3.0", path = "../circular" }
futures = "0.3.30"
nom = "7.1.3"
tokio = { version = "1.36.0", features = ["io-util", "net", "io-std", "full"] }
diff --git a/src/element.rs b/src/element.rs
index 35d73a3..0e0b8f1 100644
--- a/src/element.rs
+++ b/src/element.rs
@@ -1,23 +1,32 @@
// elements resemble a final tree, including inherited namespace information
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
+
+use crate::{
+ error::Error,
+ xml::{self, Attribute},
+};
// when are namespaces names chosen then if they are automatically calculated
// namespaces are held by readers and writers.
+#[derive(PartialEq, Eq, Hash, Clone)]
pub struct Namespace {
- prefix: Option<String>,
- namespace: String,
+ pub prefix: Option<String>,
+ pub namespace: String,
}
// names are qualified, they contain a reference to the namespace (held within the reader/writer)
+#[derive(PartialEq, Eq, Hash, Clone)]
pub struct Name {
- namespace: String,
- name: String,
+ pub namespace: Namespace,
+ pub name: String,
}
-pub enum Node {
+pub enum Content {
Element(Element),
Text(String),
+ PI(String),
+ Comment(String),
}
// should this be a trait?
@@ -29,16 +38,35 @@ pub struct Element {
// namespace: String,
// hashmap of explicit namespace declarations on the element itself only
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
- pub namespace_decl: HashMap<Option<String>, String>,
+ pub namespace_decl: HashSet<Namespace>,
// attributes can be in a different namespace than the element. how to make sure they are valid?
// maybe include the namespace instead of or with the prefix
// you can calculate the prefix from the namespaced name and the current writer context
// you can validate the prefix and calculate the namespace from the current reader context
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
pub attributes: HashMap<Name, String>,
- pub children: Option<Vec<Node>>,
+ pub content: Vec<Content>,
}
+// impl<'s> TryFrom<xml::Element<'s>> for Element<'s> {
+// type Error = Error;
+
+// fn try_from(xml_element: xml::Element) -> Result<Self, Self::Error> {
+// match &xml_element {
+// xml::Element::Empty(empty_elem_tag) => {
+// let namespace_decl;
+// let attributes;
+// empty_elem_tag
+// .attributes
+// .into_iter()
+// .filter(|attribute| matches!(attribute, Attribute::NamespaceDeclaration(_)));
+// todo!()
+// }
+// xml::Element::NotEmpty(stag, content, etag) => todo!(),
+// }
+// }
+// }
+
// example of deriving an element:
// #[derive(XMLWrite, XMLRead)]
diff --git a/src/error.rs b/src/error.rs
index 78508ae..96c709c 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,9 +1,18 @@
-use std::str::Utf8Error;
+use std::{num::ParseIntError, str::Utf8Error};
+
+use crate::element::{Name, Namespace};
pub enum Error {
ReadError(std::io::Error),
Utf8Error(Utf8Error),
ParseError(String),
+ EntityProcessError(String),
+ // TODO: better choice for failures than string
+ InvalidCharRef(String),
+ DuplicateNameSpace(Namespace),
+ DuplicateAttribute(String),
+ UnqualifiedNamespace(String),
+ MismatchedEndTag(String, String),
}
impl From<std::io::Error> for Error {
@@ -17,3 +26,9 @@ impl From<Utf8Error> for Error {
Self::Utf8Error(e)
}
}
+
+impl From<ParseIntError> for Error {
+ fn from(e: ParseIntError) -> Self {
+ Self::InvalidCharRef(e.to_string())
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index dcf14fe..329c092 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,3 +3,5 @@ mod error;
mod reader;
mod writer;
pub mod xml;
+
+pub type Result<T> = std::result::Result<T, error::Error>;
diff --git a/src/main.rs b/src/main.rs
index ea86e07..580652e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -5,12 +5,87 @@ use peanuts::xml::Document;
#[tokio::main]
async fn main() {
let (rest, document) = Document::parse(
- "<?xml version=\"1.0\"?>
-<TEST>
- <block1>Background Mark 1</block1>
- <block2>Background Mark 2</block2>
- <block3>Background Mark 3</block3>
-</TEST>ahsdkjlfhasdlkjfhkljh
+ "<?xml version='1.0' encoding='UTF-8'?>
+
+ <xs:schema
+ xmlns:xs='http://www.w3.org/2001/XMLSchema'
+ targetNamespace='http://etherx.jabber.org/streams'
+ xmlns='http://etherx.jabber.org/streams'
+ elementFormDefault='unqualified'>
+
+ <xs:import namespace='jabber:client'/>
+ <xs:import namespace='jabber:server'/>
+ <xs:import namespace='urn:ietf:params:xml:ns:xmpp-sasl'/>
+ <xs:import namespace='urn:ietf:params:xml:ns:xmpp-streams'/>
+ <xs:import namespace='urn:ietf:params:xml:ns:xmpp-tls'/>
+
+ <xs:element name='stream'>
+ <xs:complexType>
+ <xs:sequence xmlns:client='jabber:client'
+ xmlns:server='jabber:server'>
+ <xs:element ref='features'
+ minOccurs='0'
+ maxOccurs='1'/>
+ <xs:any namespace='urn:ietf:params:xml:ns:xmpp-tls'
+ minOccurs='0'
+ maxOccurs='1'/>
+ <xs:any namespace='urn:ietf:params:xml:ns:xmpp-sasl'
+ minOccurs='0'
+ maxOccurs='1'/>
+ <xs:any namespace='##other'
+ minOccurs='0'
+ maxOccurs='unbounded'
+ processContents='lax'/>
+ <xs:choice minOccurs='0' maxOccurs='1'>
+ <xs:choice minOccurs='0' maxOccurs='unbounded'>
+ <xs:element ref='client:message'/>
+ <xs:element ref='client:presence'/>
+ <xs:element ref='client:iq'/>
+ </xs:choice>
+ <xs:choice minOccurs='0' maxOccurs='unbounded'>
+ <xs:element ref='server:message'/>
+ <xs:element ref='server:presence'/>
+ <xs:element ref='server:iq'/>
+ </xs:choice>
+ </xs:choice>
+ <xs:element ref='error' minOccurs='0' maxOccurs='1'/>
+ </xs:sequence>
+ <xs:attribute name='from' type='xs:string' use='optional'/>
+ <xs:attribute name='id' type='xs:string' use='optional'/>
+ <xs:attribute name='to' type='xs:string' use='optional'/>
+ <xs:attribute name='version' type='xs:decimal' use='optional'/>
+ <xs:attribute ref='xml:lang' use='optional'/>
+ <xs:anyAttribute namespace='##other' processContents='lax'/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name='features'>
+ <xs:complexType>
+ <xs:sequence>
+ <xs:any namespace='##other'
+ minOccurs='0'
+ maxOccurs='unbounded'
+ processContents='lax'/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name='error'>
+ <xs:complexType>
+ <xs:sequence xmlns:err='urn:ietf:params:xml:ns:xmpp-streams'>
+ <xs:group ref='err:streamErrorGroup'/>
+ <xs:element ref='err:text'
+ minOccurs='0'
+ maxOccurs='1'/>
+ <xs:any namespace='##other'
+ minOccurs='0'
+ maxOccurs='1'
+ processContents='lax'/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ </xs:schema>asdf
",
)
.unwrap();
diff --git a/src/reader.rs b/src/reader.rs
index 313de4c..b51489f 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -1,31 +1,348 @@
+use circular::Buffer;
use futures::Stream;
use nom::Err;
-use std::{collections::BTreeMap, str};
-use tokio::io::AsyncBufReadExt;
+use std::{
+ collections::{BTreeMap, HashMap, HashSet},
+ path::Prefix,
+ str::{self, FromStr},
+};
+use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
+
+static MAX_STANZA_SIZE: usize = 65536;
use crate::{
- element::{Element, Name, Namespace},
+ element::{Content, Element, Name, Namespace},
error::Error,
+ xml::{self, parsers::Parser},
+ Result,
};
/// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> {
inner: R,
+ buffer: Buffer,
// holds which tags we are in atm over depth
+ // to have names reference namespaces could
depth: Vec<Name>,
- namespaces: Vec<(usize, Namespace)>,
+ namespaces: Vec<HashSet<Namespace>>,
}
impl<R> Reader<R> {
pub fn new(reader: R) -> Self {
Self {
inner: reader,
+ buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
depth: Vec::new(),
namespaces: Vec::new(),
}
}
}
+impl<R> Reader<R>
+where
+ R: AsyncRead + Unpin,
+{
+ async fn read_buf(&mut self) -> Result<usize> {
+ Ok(self.inner.read_buf(&mut self.buffer).await?)
+ }
+
+ async fn read_element<'s>(&'s mut self) -> Result<Element> {
+ self.read_buf().await?;
+ let mut input = str::from_utf8(self.buffer.data())?;
+ loop {
+ match xml::Element::parse(input) {
+ Ok((rest, e)) => {
+ let len = self.buffer.available_data() - rest.as_bytes().len();
+ let element = Reader::<R>::element_from_xml(&mut self.namespaces, e)?;
+ self.buffer.consume(len);
+ return Ok(element);
+ }
+ std::result::Result::Err(e) => match e {
+ Err::Incomplete(_) => {
+ self.read_buf().await?;
+ input = str::from_utf8(self.buffer.data())?;
+ }
+ // TODO: better error
+ Err::Error(e) => return Err(Error::ParseError(e.to_string())),
+ Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
+ },
+ }
+ }
+ }
+}
+
+impl<R> Reader<R> {
+ fn element_from_xml(
+ namespaces: &mut Vec<HashSet<Namespace>>,
+ element: xml::Element,
+ ) -> Result<Element> {
+ match element {
+ xml::Element::Empty(empty_elem_tag) => {
+ let mut namespace_declarations = HashSet::new();
+ for (prefix, namespace) in
+ empty_elem_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
+ Some((ns_name, value))
+ } else {
+ None
+ }
+ })
+ {
+ let prefix = match prefix {
+ xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
+ Some(prefixed_att_name.to_string())
+ }
+ xml::NSAttName::DefaultAttName => None,
+ };
+ let namespace = Namespace {
+ prefix,
+ namespace: namespace.process()?,
+ };
+ if !namespace_declarations.insert(namespace.clone()) {
+ return Err(Error::DuplicateNameSpace(namespace));
+ }
+ }
+
+ // all namespaces available to the element (from both parent elements and element itself)
+ let namespace_stack: Vec<&Namespace> = namespaces
+ .iter()
+ .flatten()
+ .chain(namespace_declarations.iter())
+ .collect();
+
+ let mut attributes = HashMap::new();
+
+ for (q_name, value) in empty_elem_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::Attribute { name, value } = attribute {
+ Some((name, value))
+ } else {
+ None
+ }
+ }) {
+ let namespace;
+ let attribute_name;
+ match q_name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack.iter().rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ });
+ attribute_name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ attribute_name = unprefixed_name.to_string();
+ }
+ }
+ if let Some(namespace) = namespace {
+ let namespace = (*namespace).clone();
+ let name = Name {
+ namespace,
+ name: attribute_name,
+ };
+ let value = value.process()?;
+ if let Some(_value) = attributes.insert(name, value) {
+ return Err(Error::DuplicateAttribute(q_name.to_string()));
+ }
+ } else {
+ return Err(Error::UnqualifiedNamespace(q_name.to_string()));
+ }
+ }
+
+ let name;
+ let namespace;
+ match &empty_elem_tag.name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack.iter().rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ });
+ name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ name = unprefixed_name.to_string();
+ }
+ }
+
+ let namespace = (*namespace
+ .ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?)
+ .clone();
+
+ let name = Name { namespace, name };
+
+ return Ok(Element {
+ name,
+ namespace_decl: namespace_declarations,
+ attributes,
+ content: Vec::new(),
+ });
+ }
+ xml::Element::NotEmpty(s_tag, content, e_tag) => {
+ if s_tag.name != e_tag.name {
+ return Err(Error::MismatchedEndTag(
+ s_tag.name.to_string(),
+ e_tag.name.to_string(),
+ ));
+ }
+ let mut namespace_declarations = HashSet::new();
+ for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
+ Some((ns_name, value))
+ } else {
+ None
+ }
+ }) {
+ let prefix = match prefix {
+ xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
+ Some(prefixed_att_name.to_string())
+ }
+ xml::NSAttName::DefaultAttName => None,
+ };
+ let namespace = Namespace {
+ prefix,
+ namespace: namespace.process()?,
+ };
+ if !namespace_declarations.insert(namespace.clone()) {
+ return Err(Error::DuplicateNameSpace(namespace));
+ }
+ }
+
+ // all namespaces available to the element (from both parent elements and element itself)
+ let namespace_stack: Vec<&Namespace> = namespaces
+ .iter()
+ .flatten()
+ .chain(namespace_declarations.iter())
+ .collect();
+
+ let mut attributes = HashMap::new();
+
+ for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| {
+ if let xml::Attribute::Attribute { name, value } = attribute {
+ Some((name, value))
+ } else {
+ None
+ }
+ }) {
+ let namespace;
+ let attribute_name;
+ match q_name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack.iter().rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ });
+ attribute_name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ attribute_name = unprefixed_name.to_string();
+ }
+ }
+ if let Some(namespace) = namespace {
+ let namespace = (*namespace).clone();
+ let name = Name {
+ namespace,
+ name: attribute_name,
+ };
+ let value = value.process()?;
+ if let Some(_value) = attributes.insert(name, value) {
+ return Err(Error::DuplicateAttribute(q_name.to_string()));
+ }
+ } else {
+ return Err(Error::UnqualifiedNamespace(q_name.to_string()));
+ }
+ }
+
+ let name;
+ let namespace;
+ match &s_tag.name {
+ xml::QName::PrefixedName(prefixed_name) => {
+ namespace = namespace_stack.iter().rfind(|namespace| {
+ namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
+ });
+ name = prefixed_name.local_part.to_string();
+ }
+ xml::QName::UnprefixedName(unprefixed_name) => {
+ namespace = namespace_stack
+ .iter()
+ .rfind(|namespace| namespace.prefix == None);
+ name = unprefixed_name.to_string();
+ }
+ }
+
+ let namespace = (*namespace
+ .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
+ .clone();
+
+ let name = Name { namespace, name };
+
+ namespaces.push(namespace_declarations.clone());
+
+ let content = Self::content_from_xml(namespaces, content)?;
+
+ namespaces.pop();
+
+ return Ok(Element {
+ name,
+ namespace_decl: namespace_declarations,
+ attributes,
+ content,
+ });
+ }
+ }
+ }
+
+ fn content_from_xml(
+ namespaces: &mut Vec<HashSet<Namespace>>,
+ element: xml::Content,
+ ) -> Result<Vec<Content>> {
+ let mut content = Vec::new();
+ let mut text = element.char_data.map(|str| String::from(*str));
+ for (content_item, char_data) in element.content {
+ match content_item {
+ xml::ContentItem::Element(element) => {
+ text.map(|text| content.push(Content::Text(text)));
+ content.push(Content::Element(Self::element_from_xml(
+ namespaces, element,
+ )?));
+ text = char_data.map(|str| String::from(*str));
+ }
+ xml::ContentItem::Reference(reference) => {
+ let data = reference.process()?;
+ if let Some(text) = &mut text {
+ text.push(data)
+ } else {
+ text = Some(String::from(data))
+ }
+ char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
+ }
+ xml::ContentItem::CDSect(cd_sect) => {
+ if let Some(text) = &mut text {
+ text.push_str(**cd_sect)
+ } else {
+ text = Some(String::from(**cd_sect))
+ }
+ char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
+ }
+ // TODO: is this important?
+ xml::ContentItem::PI(pi) => {
+ char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
+ }
+ // TODO: comments?
+ xml::ContentItem::Comment(comment) => {
+ char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
+ }
+ }
+ }
+ text.map(|text| content.push(Content::Text(text)));
+ todo!()
+ }
+}
+
// impl<R> Reader<R>
// where
// R: AsyncBufReadExt + Unpin,
diff --git a/src/xml/mod.rs b/src/xml/mod.rs
index f072fde..221c334 100644
--- a/src/xml/mod.rs
+++ b/src/xml/mod.rs
@@ -1,4 +1,6 @@
-use std::char;
+use std::{char, ops::Deref};
+
+use crate::error::Error;
pub mod composers;
pub mod parsers;
@@ -14,40 +16,91 @@ pub enum NSAttName<'s> {
#[derive(Clone, Debug)]
pub struct PrefixedAttName<'s>(NCName<'s>);
+impl<'s> Deref for PrefixedAttName<'s> {
+ type Target = NCName<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [3] DefaultAttName ::= 'xmlns';
#[derive(Clone, Debug)]
pub struct DefaultAttName;
/// [4] NCName ::= Name - (Char* ':' Char*)
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct NCName<'s>(&'s str);
+impl<'s> Deref for NCName<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [7] QName ::= PrefixedName | UnprefixedName
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub enum QName<'s> {
PrefixedName(PrefixedName<'s>),
UnprefixedName(UnprefixedName<'s>),
}
+impl<'s> ToString for QName<'s> {
+ fn to_string(&self) -> String {
+ match self {
+ QName::PrefixedName(prefixed_name) => {
+ format!("{}:{}", **prefixed_name.prefix, **prefixed_name.local_part)
+ }
+ QName::UnprefixedName(unprefixed_name) => unprefixed_name.to_string(),
+ }
+ }
+}
+
/// [8] PrefixedName ::= Prefix ':' LocalPart
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrefixedName<'s> {
- prefix: Prefix<'s>,
- local_part: LocalPart<'s>,
+ pub(crate) prefix: Prefix<'s>,
+ pub(crate) local_part: LocalPart<'s>,
}
/// [9] UnprefixedName ::= LocalPart
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct UnprefixedName<'s>(LocalPart<'s>);
+impl<'s> Deref for UnprefixedName<'s> {
+ type Target = LocalPart<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [10] Prefix ::= NCName
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Prefix<'s>(NCName<'s>);
+impl<'s> Deref for Prefix<'s> {
+ type Target = NCName<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [11] LocalPart ::= NCName
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct LocalPart<'s>(NCName<'s>);
+impl<'s> Deref for LocalPart<'s> {
+ type Target = NCName<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
// xml spec
/// [1] document ::= prolog element Misc*
@@ -57,6 +110,14 @@ pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
#[repr(transparent)]
pub struct Char(char);
+impl Deref for Char {
+ type Target = char;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
#[derive(Clone)]
#[repr(transparent)]
@@ -66,28 +127,76 @@ pub struct S;
#[repr(transparent)]
pub struct NameStartChar(char);
+impl Deref for NameStartChar {
+ type Target = char;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
#[repr(transparent)]
pub struct NameChar(char);
+impl Deref for NameChar {
+ type Target = char;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [5] Name ::= NameStartChar (NameChar)*
#[derive(Debug, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct Name<'s>(&'s str);
+impl<'s> Deref for Name<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [6] Names ::= Name (#x20 Name)*
#[repr(transparent)]
pub struct Names<'s>(Vec<Name<'s>>);
+impl<'s> Deref for Names<'s> {
+ type Target = Vec<Name<'s>>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [7] Nmtoken ::= (NameChar)+
#[derive(Debug, Clone)]
#[repr(transparent)]
pub struct Nmtoken<'s>(&'s str);
+impl<'s> Deref for Nmtoken<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
#[repr(transparent)]
pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>);
+impl<'s> Deref for Nmtokens<'s> {
+ type Target = Vec<Nmtoken<'s>>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
#[derive(Clone, Debug)]
pub enum EntityValueData<'s> {
String(&'s str),
@@ -115,6 +224,24 @@ pub enum AttValue<'s> {
SingleQuoted(Vec<AttValueData<'s>>),
}
+impl<'s> AttValue<'s> {
+ pub fn process(&self) -> crate::Result<String> {
+ let mut output = String::new();
+ let data;
+ match self {
+ AttValue::DoubleQuoted(vec) => data = vec,
+ AttValue::SingleQuoted(vec) => data = vec,
+ }
+ for data in data {
+ match data {
+ AttValueData::String(s) => output.push_str(s),
+ AttValueData::Reference(reference) => output.push(reference.process()?),
+ }
+ }
+ Ok(output)
+ }
+}
+
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
#[derive(Debug)]
pub enum SystemLiteral<'s> {
@@ -138,11 +265,27 @@ pub struct PubidChar(char);
#[repr(transparent)]
pub struct CharData<'s>(&'s str);
+impl<'s> Deref for CharData<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Comment<'s>(&'s str);
+impl<'s> Deref for Comment<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
#[derive(Clone, Debug)]
pub struct PI<'s> {
@@ -160,6 +303,14 @@ pub struct PITarget<'s>(Name<'s>);
#[repr(transparent)]
pub struct CDSect<'s>(CData<'s>);
+impl<'s> Deref for CDSect<'s> {
+ type Target = CData<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [19] CDStart ::= '<![CDATA['
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDStart;
@@ -169,6 +320,14 @@ pub struct CDStart;
#[repr(transparent)]
pub struct CData<'s>(&'s str);
+impl<'s> Deref for CData<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [21] CDEnd ::= ']]>'
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDEnd;
@@ -286,8 +445,8 @@ pub enum Element<'s> {
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
#[derive(Debug, Clone)]
pub struct STag<'s> {
- name: QName<'s>,
- attributes: Vec<Attribute<'s>>,
+ pub(crate) name: QName<'s>,
+ pub(crate) attributes: Vec<Attribute<'s>>,
}
/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
@@ -309,7 +468,7 @@ pub enum Attribute<'s> {
/// [42] ETag ::= '</' Name S? '>'
#[derive(Debug, Clone)]
pub struct ETag<'s> {
- name: QName<'s>,
+ pub(crate) name: QName<'s>,
}
#[derive(Debug, Clone)]
@@ -324,16 +483,16 @@ pub enum ContentItem<'s> {
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
#[derive(Debug, Clone)]
pub struct Content<'s> {
- char_data: Option<CharData<'s>>,
- content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
+ pub(crate) char_data: Option<CharData<'s>>,
+ pub(crate) content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
}
/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
- name: QName<'s>,
- attributes: Vec<Attribute<'s>>,
+ pub(crate) name: QName<'s>,
+ pub(crate) attributes: Vec<Attribute<'s>>,
}
/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
@@ -503,6 +662,32 @@ pub enum CharRef<'s> {
Hexadecimal(&'s str),
}
+impl<'s> CharRef<'s> {
+ pub fn process(&self) -> crate::Result<char> {
+ let int: u32;
+ match self {
+ CharRef::Decimal(dec) => {
+ int = dec.parse()?;
+ }
+ CharRef::Hexadecimal(hex) => {
+ int = <u32>::from_str_radix(hex, 16)?;
+ }
+ }
+ let c = std::char::from_u32(int);
+
+ let c = c.ok_or_else(|| Error::InvalidCharRef(int.to_string()))?;
+ if matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}')
+ {
+ return Ok(c);
+ } else {
+ return Err(Error::InvalidCharRef(format!(
+ "{} is not a valid xml char",
+ c
+ )));
+ };
+ }
+}
+
/// [67] Reference ::= EntityRef | CharRef
#[derive(Clone, Debug)]
pub enum Reference<'s> {
@@ -510,10 +695,34 @@ pub enum Reference<'s> {
CharRef(CharRef<'s>),
}
+impl<'s> Reference<'s> {
+ pub fn process(&self) -> crate::Result<char> {
+ match self {
+ Reference::EntityRef(entity_ref) => match *entity_ref.deref().deref() {
+ "amp" => Ok('&'),
+ "lt" => Ok('<'),
+ "gt" => Ok('>'),
+ "apos" => Ok('\''),
+ "quot" => Ok('"'),
+ e => return Err(Error::EntityProcessError(e.to_string())),
+ },
+ Reference::CharRef(char_ref) => Ok(char_ref.process()?),
+ }
+ }
+}
+
/// [68] EntityRef ::= '&' Name ';'
#[derive(Clone, Debug)]
pub struct EntityRef<'s>(Name<'s>);
+impl<'s> Deref for EntityRef<'s> {
+ type Target = Name<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [69] PEReference ::= '%' Name ';'
#[derive(Clone, Debug)]
#[repr(transparent)]