diff options
Diffstat (limited to 'src/xml/mod.rs')
-rw-r--r-- | src/xml/mod.rs | 241 |
1 files changed, 225 insertions, 16 deletions
diff --git a/src/xml/mod.rs b/src/xml/mod.rs index f072fde..221c334 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -1,4 +1,6 @@ -use std::char; +use std::{char, ops::Deref}; + +use crate::error::Error; pub mod composers; pub mod parsers; @@ -14,40 +16,91 @@ pub enum NSAttName<'s> { #[derive(Clone, Debug)] pub struct PrefixedAttName<'s>(NCName<'s>); +impl<'s> Deref for PrefixedAttName<'s> { + type Target = NCName<'s>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [3] DefaultAttName ::= 'xmlns'; #[derive(Clone, Debug)] pub struct DefaultAttName; /// [4] NCName ::= Name - (Char* ':' Char*) -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct NCName<'s>(&'s str); +impl<'s> Deref for NCName<'s> { + type Target = &'s str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [7] QName ::= PrefixedName | UnprefixedName -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum QName<'s> { PrefixedName(PrefixedName<'s>), UnprefixedName(UnprefixedName<'s>), } +impl<'s> ToString for QName<'s> { + fn to_string(&self) -> String { + match self { + QName::PrefixedName(prefixed_name) => { + format!("{}:{}", **prefixed_name.prefix, **prefixed_name.local_part) + } + QName::UnprefixedName(unprefixed_name) => unprefixed_name.to_string(), + } + } +} + /// [8] PrefixedName ::= Prefix ':' LocalPart -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct PrefixedName<'s> { - prefix: Prefix<'s>, - local_part: LocalPart<'s>, + pub(crate) prefix: Prefix<'s>, + pub(crate) local_part: LocalPart<'s>, } /// [9] UnprefixedName ::= LocalPart -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct UnprefixedName<'s>(LocalPart<'s>); +impl<'s> Deref for UnprefixedName<'s> { + type Target = LocalPart<'s>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [10] Prefix ::= NCName -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct Prefix<'s>(NCName<'s>); +impl<'s> Deref for Prefix<'s> { + type Target = NCName<'s>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [11] LocalPart ::= NCName -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct LocalPart<'s>(NCName<'s>); +impl<'s> Deref for LocalPart<'s> { + type Target = NCName<'s>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + // xml spec /// [1] document ::= prolog element Misc* @@ -57,6 +110,14 @@ pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>); #[repr(transparent)] pub struct Char(char); +impl Deref for Char { + type Target = char; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [3] S ::= (#x20 | #x9 | #xD | #xA)+ #[derive(Clone)] #[repr(transparent)] @@ -66,28 +127,76 @@ pub struct S; #[repr(transparent)] pub struct NameStartChar(char); +impl Deref for NameStartChar { + type Target = char; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] #[repr(transparent)] pub struct NameChar(char); +impl Deref for NameChar { + type Target = char; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [5] Name ::= NameStartChar (NameChar)* #[derive(Debug, Clone, PartialEq, Eq)] #[repr(transparent)] pub struct Name<'s>(&'s str); +impl<'s> Deref for Name<'s> { + type Target = &'s str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [6] Names ::= Name (#x20 Name)* #[repr(transparent)] pub struct Names<'s>(Vec<Name<'s>>); +impl<'s> Deref for Names<'s> { + type Target = Vec<Name<'s>>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [7] Nmtoken ::= (NameChar)+ #[derive(Debug, Clone)] #[repr(transparent)] pub struct Nmtoken<'s>(&'s str); +impl<'s> Deref for Nmtoken<'s> { + type Target = &'s str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* #[repr(transparent)] pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>); +impl<'s> Deref for Nmtokens<'s> { + type Target = Vec<Nmtoken<'s>>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + #[derive(Clone, Debug)] pub enum EntityValueData<'s> { String(&'s str), @@ -115,6 +224,24 @@ pub enum AttValue<'s> { SingleQuoted(Vec<AttValueData<'s>>), } +impl<'s> AttValue<'s> { + pub fn process(&self) -> crate::Result<String> { + let mut output = String::new(); + let data; + match self { + AttValue::DoubleQuoted(vec) => data = vec, + AttValue::SingleQuoted(vec) => data = vec, + } + for data in data { + match data { + AttValueData::String(s) => output.push_str(s), + AttValueData::Reference(reference) => output.push(reference.process()?), + } + } + Ok(output) + } +} + /// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") #[derive(Debug)] pub enum SystemLiteral<'s> { @@ -138,11 +265,27 @@ pub struct PubidChar(char); #[repr(transparent)] pub struct CharData<'s>(&'s str); +impl<'s> Deref for CharData<'s> { + type Target = &'s str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' #[derive(Clone, Debug, PartialEq, Eq)] #[repr(transparent)] pub struct Comment<'s>(&'s str); +impl<'s> Deref for Comment<'s> { + type Target = &'s str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' #[derive(Clone, Debug)] pub struct PI<'s> { @@ -160,6 +303,14 @@ pub struct PITarget<'s>(Name<'s>); #[repr(transparent)] pub struct CDSect<'s>(CData<'s>); +impl<'s> Deref for CDSect<'s> { + type Target = CData<'s>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [19] CDStart ::= '<![CDATA[' #[derive(Clone, Debug, PartialEq, Eq)] pub struct CDStart; @@ -169,6 +320,14 @@ pub struct CDStart; #[repr(transparent)] pub struct CData<'s>(&'s str); +impl<'s> Deref for CData<'s> { + type Target = &'s str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [21] CDEnd ::= ']]>' #[derive(Clone, Debug, PartialEq, Eq)] pub struct CDEnd; @@ -286,8 +445,8 @@ pub enum Element<'s> { /// [40] STag ::= '<' Name (S Attribute)* S? '>' #[derive(Debug, Clone)] pub struct STag<'s> { - name: QName<'s>, - attributes: Vec<Attribute<'s>>, + pub(crate) name: QName<'s>, + pub(crate) attributes: Vec<Attribute<'s>>, } /// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue @@ -309,7 +468,7 @@ pub enum Attribute<'s> { /// [42] ETag ::= '</' Name S? '>' #[derive(Debug, Clone)] pub struct ETag<'s> { - name: QName<'s>, + pub(crate) name: QName<'s>, } #[derive(Debug, Clone)] @@ -324,16 +483,16 @@ pub enum ContentItem<'s> { /// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* #[derive(Debug, Clone)] pub struct Content<'s> { - char_data: Option<CharData<'s>>, - content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>, + pub(crate) char_data: Option<CharData<'s>>, + pub(crate) content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>, } /// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' /// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec] #[derive(Debug, Clone)] pub struct EmptyElemTag<'s> { - name: QName<'s>, - attributes: Vec<Attribute<'s>>, + pub(crate) name: QName<'s>, + pub(crate) attributes: Vec<Attribute<'s>>, } /// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>' @@ -503,6 +662,32 @@ pub enum CharRef<'s> { Hexadecimal(&'s str), } +impl<'s> CharRef<'s> { + pub fn process(&self) -> crate::Result<char> { + let int: u32; + match self { + CharRef::Decimal(dec) => { + int = dec.parse()?; + } + CharRef::Hexadecimal(hex) => { + int = <u32>::from_str_radix(hex, 16)?; + } + } + let c = std::char::from_u32(int); + + let c = c.ok_or_else(|| Error::InvalidCharRef(int.to_string()))?; + if matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}') + { + return Ok(c); + } else { + return Err(Error::InvalidCharRef(format!( + "{} is not a valid xml char", + c + ))); + }; + } +} + /// [67] Reference ::= EntityRef | CharRef #[derive(Clone, Debug)] pub enum Reference<'s> { @@ -510,10 +695,34 @@ pub enum Reference<'s> { CharRef(CharRef<'s>), } +impl<'s> Reference<'s> { + pub fn process(&self) -> crate::Result<char> { + match self { + Reference::EntityRef(entity_ref) => match *entity_ref.deref().deref() { + "amp" => Ok('&'), + "lt" => Ok('<'), + "gt" => Ok('>'), + "apos" => Ok('\''), + "quot" => Ok('"'), + e => return Err(Error::EntityProcessError(e.to_string())), + }, + Reference::CharRef(char_ref) => Ok(char_ref.process()?), + } + } +} + /// [68] EntityRef ::= '&' Name ';' #[derive(Clone, Debug)] pub struct EntityRef<'s>(Name<'s>); +impl<'s> Deref for EntityRef<'s> { + type Target = Name<'s>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// [69] PEReference ::= '%' Name ';' #[derive(Clone, Debug)] #[repr(transparent)] |