aboutsummaryrefslogtreecommitdiffstats
path: root/src/xml/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/xml/mod.rs')
-rw-r--r--src/xml/mod.rs241
1 files changed, 225 insertions, 16 deletions
diff --git a/src/xml/mod.rs b/src/xml/mod.rs
index f072fde..221c334 100644
--- a/src/xml/mod.rs
+++ b/src/xml/mod.rs
@@ -1,4 +1,6 @@
-use std::char;
+use std::{char, ops::Deref};
+
+use crate::error::Error;
pub mod composers;
pub mod parsers;
@@ -14,40 +16,91 @@ pub enum NSAttName<'s> {
#[derive(Clone, Debug)]
pub struct PrefixedAttName<'s>(NCName<'s>);
+impl<'s> Deref for PrefixedAttName<'s> {
+ type Target = NCName<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [3] DefaultAttName ::= 'xmlns';
#[derive(Clone, Debug)]
pub struct DefaultAttName;
/// [4] NCName ::= Name - (Char* ':' Char*)
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct NCName<'s>(&'s str);
+impl<'s> Deref for NCName<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [7] QName ::= PrefixedName | UnprefixedName
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub enum QName<'s> {
PrefixedName(PrefixedName<'s>),
UnprefixedName(UnprefixedName<'s>),
}
+impl<'s> ToString for QName<'s> {
+ fn to_string(&self) -> String {
+ match self {
+ QName::PrefixedName(prefixed_name) => {
+ format!("{}:{}", **prefixed_name.prefix, **prefixed_name.local_part)
+ }
+ QName::UnprefixedName(unprefixed_name) => unprefixed_name.to_string(),
+ }
+ }
+}
+
/// [8] PrefixedName ::= Prefix ':' LocalPart
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrefixedName<'s> {
- prefix: Prefix<'s>,
- local_part: LocalPart<'s>,
+ pub(crate) prefix: Prefix<'s>,
+ pub(crate) local_part: LocalPart<'s>,
}
/// [9] UnprefixedName ::= LocalPart
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct UnprefixedName<'s>(LocalPart<'s>);
+impl<'s> Deref for UnprefixedName<'s> {
+ type Target = LocalPart<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [10] Prefix ::= NCName
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Prefix<'s>(NCName<'s>);
+impl<'s> Deref for Prefix<'s> {
+ type Target = NCName<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [11] LocalPart ::= NCName
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
pub struct LocalPart<'s>(NCName<'s>);
+impl<'s> Deref for LocalPart<'s> {
+ type Target = NCName<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
// xml spec
/// [1] document ::= prolog element Misc*
@@ -57,6 +110,14 @@ pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
#[repr(transparent)]
pub struct Char(char);
+impl Deref for Char {
+ type Target = char;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
#[derive(Clone)]
#[repr(transparent)]
@@ -66,28 +127,76 @@ pub struct S;
#[repr(transparent)]
pub struct NameStartChar(char);
+impl Deref for NameStartChar {
+ type Target = char;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
#[repr(transparent)]
pub struct NameChar(char);
+impl Deref for NameChar {
+ type Target = char;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [5] Name ::= NameStartChar (NameChar)*
#[derive(Debug, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct Name<'s>(&'s str);
+impl<'s> Deref for Name<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [6] Names ::= Name (#x20 Name)*
#[repr(transparent)]
pub struct Names<'s>(Vec<Name<'s>>);
+impl<'s> Deref for Names<'s> {
+ type Target = Vec<Name<'s>>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [7] Nmtoken ::= (NameChar)+
#[derive(Debug, Clone)]
#[repr(transparent)]
pub struct Nmtoken<'s>(&'s str);
+impl<'s> Deref for Nmtoken<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
#[repr(transparent)]
pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>);
+impl<'s> Deref for Nmtokens<'s> {
+ type Target = Vec<Nmtoken<'s>>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
#[derive(Clone, Debug)]
pub enum EntityValueData<'s> {
String(&'s str),
@@ -115,6 +224,24 @@ pub enum AttValue<'s> {
SingleQuoted(Vec<AttValueData<'s>>),
}
+impl<'s> AttValue<'s> {
+ pub fn process(&self) -> crate::Result<String> {
+ let mut output = String::new();
+ let data;
+ match self {
+ AttValue::DoubleQuoted(vec) => data = vec,
+ AttValue::SingleQuoted(vec) => data = vec,
+ }
+ for data in data {
+ match data {
+ AttValueData::String(s) => output.push_str(s),
+ AttValueData::Reference(reference) => output.push(reference.process()?),
+ }
+ }
+ Ok(output)
+ }
+}
+
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
#[derive(Debug)]
pub enum SystemLiteral<'s> {
@@ -138,11 +265,27 @@ pub struct PubidChar(char);
#[repr(transparent)]
pub struct CharData<'s>(&'s str);
+impl<'s> Deref for CharData<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Comment<'s>(&'s str);
+impl<'s> Deref for Comment<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
#[derive(Clone, Debug)]
pub struct PI<'s> {
@@ -160,6 +303,14 @@ pub struct PITarget<'s>(Name<'s>);
#[repr(transparent)]
pub struct CDSect<'s>(CData<'s>);
+impl<'s> Deref for CDSect<'s> {
+ type Target = CData<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [19] CDStart ::= '<![CDATA['
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDStart;
@@ -169,6 +320,14 @@ pub struct CDStart;
#[repr(transparent)]
pub struct CData<'s>(&'s str);
+impl<'s> Deref for CData<'s> {
+ type Target = &'s str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [21] CDEnd ::= ']]>'
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDEnd;
@@ -286,8 +445,8 @@ pub enum Element<'s> {
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
#[derive(Debug, Clone)]
pub struct STag<'s> {
- name: QName<'s>,
- attributes: Vec<Attribute<'s>>,
+ pub(crate) name: QName<'s>,
+ pub(crate) attributes: Vec<Attribute<'s>>,
}
/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
@@ -309,7 +468,7 @@ pub enum Attribute<'s> {
/// [42] ETag ::= '</' Name S? '>'
#[derive(Debug, Clone)]
pub struct ETag<'s> {
- name: QName<'s>,
+ pub(crate) name: QName<'s>,
}
#[derive(Debug, Clone)]
@@ -324,16 +483,16 @@ pub enum ContentItem<'s> {
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
#[derive(Debug, Clone)]
pub struct Content<'s> {
- char_data: Option<CharData<'s>>,
- content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
+ pub(crate) char_data: Option<CharData<'s>>,
+ pub(crate) content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
}
/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
- name: QName<'s>,
- attributes: Vec<Attribute<'s>>,
+ pub(crate) name: QName<'s>,
+ pub(crate) attributes: Vec<Attribute<'s>>,
}
/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
@@ -503,6 +662,32 @@ pub enum CharRef<'s> {
Hexadecimal(&'s str),
}
+impl<'s> CharRef<'s> {
+ pub fn process(&self) -> crate::Result<char> {
+ let int: u32;
+ match self {
+ CharRef::Decimal(dec) => {
+ int = dec.parse()?;
+ }
+ CharRef::Hexadecimal(hex) => {
+ int = <u32>::from_str_radix(hex, 16)?;
+ }
+ }
+ let c = std::char::from_u32(int);
+
+ let c = c.ok_or_else(|| Error::InvalidCharRef(int.to_string()))?;
+ if matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}')
+ {
+ return Ok(c);
+ } else {
+ return Err(Error::InvalidCharRef(format!(
+ "{} is not a valid xml char",
+ c
+ )));
+ };
+ }
+}
+
/// [67] Reference ::= EntityRef | CharRef
#[derive(Clone, Debug)]
pub enum Reference<'s> {
@@ -510,10 +695,34 @@ pub enum Reference<'s> {
CharRef(CharRef<'s>),
}
+impl<'s> Reference<'s> {
+ pub fn process(&self) -> crate::Result<char> {
+ match self {
+ Reference::EntityRef(entity_ref) => match *entity_ref.deref().deref() {
+ "amp" => Ok('&'),
+ "lt" => Ok('<'),
+ "gt" => Ok('>'),
+ "apos" => Ok('\''),
+ "quot" => Ok('"'),
+ e => return Err(Error::EntityProcessError(e.to_string())),
+ },
+ Reference::CharRef(char_ref) => Ok(char_ref.process()?),
+ }
+ }
+}
+
/// [68] EntityRef ::= '&' Name ';'
#[derive(Clone, Debug)]
pub struct EntityRef<'s>(Name<'s>);
+impl<'s> Deref for EntityRef<'s> {
+ type Target = Name<'s>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
/// [69] PEReference ::= '%' Name ';'
#[derive(Clone, Debug)]
#[repr(transparent)]