diff options
Diffstat (limited to '')
-rw-r--r-- | examples/example.rs (renamed from src/main.rs) | 0 | ||||
-rw-r--r-- | src/declaration.rs | 3 | ||||
-rw-r--r-- | src/element.rs | 66 | ||||
-rw-r--r-- | src/error.rs | 54 | ||||
-rw-r--r-- | src/lib.rs | 35 | ||||
-rw-r--r-- | src/reader.rs | 274 | ||||
-rw-r--r-- | src/writer.rs | 558 | ||||
-rw-r--r-- | src/writer/endable.rs (renamed from src/endable.rs) | 0 | ||||
-rw-r--r-- | src/writer/loggable.rs (renamed from src/loggable.rs) | 5 | ||||
-rw-r--r-- | src/xml/mod.rs | 2 | ||||
-rw-r--r-- | src/xml/parsers_complete.rs | 7 |
11 files changed, 923 insertions, 81 deletions
diff --git a/src/main.rs b/examples/example.rs index 580652e..580652e 100644 --- a/src/main.rs +++ b/examples/example.rs diff --git a/src/declaration.rs b/src/declaration.rs index 2c0855f..ecd877a 100644 --- a/src/declaration.rs +++ b/src/declaration.rs @@ -1,9 +1,11 @@ +/// An XML declaration. pub struct Declaration { pub version_info: VersionInfo, pub encoding_decl: Option<String>, pub sd_decl: Option<bool>, } +/// An XML version. #[derive(Clone, Copy)] pub enum VersionInfo { One, @@ -11,6 +13,7 @@ pub enum VersionInfo { } impl Declaration { + /// Create an XML declaration from a version. pub fn version(version: VersionInfo) -> Self { Self { version_info: version, diff --git a/src/element.rs b/src/element.rs index 1c1366a..b6b3c15 100644 --- a/src/element.rs +++ b/src/element.rs @@ -11,12 +11,15 @@ use crate::{ Result, }; +/// Result type for the `FromElement` trait. pub type DeserializeResult<T> = std::result::Result<T, DeserializeError>; +/// Trait for conversion from an `Element` into another type, for deserialisation from a `Reader`. pub trait FromElement: Sized { fn from_element(element: Element) -> DeserializeResult<Self>; } +/// Trait for conversion from a type into an `Element`, for serialisation into a `Writer`. pub trait IntoElement { fn builder(&self) -> ElementBuilder; @@ -45,18 +48,24 @@ pub struct Name { pub local_name: String, } +/// `Content` represents anything that can be the content of an XML element. #[derive(Debug, Clone)] pub enum Content { + /// A child element. Element(Element), + /// A text value. Text(String), + /// A processing instruction. PI, + /// A comment. Comment(String), } // should this be a trait? +/// `Element` represents an XML element that can be written to a `Writer` or read from a `Reader`. #[derive(Debug, Clone)] pub struct Element { - pub name: Name, + pub(crate) name: Name, // namespace: Name, // each element once created contains the qualified namespace information for that element // the name contains the qualified namespace so this is unnecessary @@ -64,15 +73,15 @@ pub struct Element { // hashmap of explicit namespace declarations on the element itself only // possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader. // change this to custom namespace declarations only, so you can override the definition of namespaces if you wish - pub namespace_declaration_overrides: HashSet<NamespaceDeclaration>, + pub(crate) namespace_declaration_overrides: HashSet<NamespaceDeclaration>, // attributes can be in a different namespace than the element. how to make sure they are valid? // maybe include the namespace instead of or with the prefix // you can calculate the prefix from the namespaced name and the current writer context // you can validate the prefix and calculate the namespace from the current reader context // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified. - pub attributes: HashMap<Name, String>, + pub(crate) attributes: HashMap<Name, String>, // TODO: make a hashmap maybe? to be able to address parts of the content individually - pub content: VecDeque<Content>, + pub(crate) content: VecDeque<Content>, } impl FromElement for Element { @@ -82,10 +91,12 @@ impl FromElement for Element { } impl Element { + /// Return the namespace the xml element is qualified by, and the localname, for matching on the element when you don't know which kind of element to expect. pub fn identify(&self) -> (Option<&str>, &str) { (self.name.namespace.as_deref(), &self.name.local_name) } + /// Check the localname of the element. pub fn check_name(&self, name: &str) -> DeserializeResult<()> { if self.name.local_name == name { Ok(()) @@ -97,6 +108,7 @@ impl Element { } } + /// Check the element is qualified by a namespace. pub fn check_namespace(&self, namespace: &str) -> DeserializeResult<()> { if self.name.namespace.as_deref() == Some(namespace) { return Ok(()); @@ -114,6 +126,7 @@ impl Element { } } + /// Optionally extract an attribute from the element. pub fn attribute_opt<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<Option<V>> { if let Some(att_value) = self.attributes.remove(&Name { namespace: None, @@ -127,6 +140,7 @@ impl Element { } } + /// Optionally extract a namespaced attribute from the elmeent. pub fn attribute_opt_namespaced<V: FromStr>( &mut self, att_name: &str, @@ -144,6 +158,7 @@ impl Element { } } + /// Extract an attribute from the element. pub fn attribute<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<V> { let name = Name { namespace: None, @@ -158,6 +173,7 @@ impl Element { } } + /// Extract a namespaced attribute from the element. pub fn attribute_namespaced<V: FromStr>( &mut self, att_name: &str, @@ -176,6 +192,7 @@ impl Element { } } + /// Ensure there are no more attributes on the element. pub fn no_more_attributes(self) -> DeserializeResult<Self> { if self.attributes.is_empty() { Ok(self) @@ -186,6 +203,8 @@ impl Element { // for xs:any + /// Extract a child of type `T` from the element. + /// E.g. when there is an xs:any. pub fn child_one<T: FromElement>(&mut self) -> DeserializeResult<T> { if let Some(position) = self.content.iter().position(|content| match content { Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(), @@ -204,6 +223,8 @@ impl Element { } } + /// Optionally extract a child of type `T` from the element. + /// E.g. when there is an xs:any. pub fn child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> { if let Some(position) = self.content.iter().position(|content| match content { Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(), @@ -222,6 +243,7 @@ impl Element { } } + /// Extract several children of type `T` from the element. pub fn children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> { let (children, rest): (VecDeque<_>, VecDeque<_>) = self .content @@ -252,6 +274,7 @@ impl Element { Ok(children) } + /// Extract a text value from the element. pub fn value<V: FromStr>(&mut self) -> DeserializeResult<V> { if let Some(position) = self.content.iter().position(|content| match content { Content::Element(_) => false, @@ -270,6 +293,7 @@ impl Element { } } + /// Optionally extract a text value from the element. pub fn value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> { if let Some(position) = self.content.iter().position(|content| match content { Content::Element(_) => false, @@ -290,6 +314,8 @@ impl Element { // for xs:sequence + /// Pop a child element of type `T` from the element. + /// E.g. when there is an xs:sequence. pub fn pop_child_one<T: FromElement>(&mut self) -> DeserializeResult<T> { loop { let child = self @@ -307,6 +333,8 @@ impl Element { } } + /// Optionally pop a child element of type `T` from the element. + /// E.g. when there is an xs:sequence. pub fn pop_child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> { loop { let child = self.content.pop_front(); @@ -327,6 +355,8 @@ impl Element { } } + /// Pop several children of type `T` from the element. + /// E.g. when there is an xs:sequence. pub fn pop_children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> { let mut children = Vec::new(); loop { @@ -360,6 +390,8 @@ impl Element { } } + /// Pop a text value from the element. + /// E.g. when there is an xs:sequence. pub fn pop_value<V: FromStr>(&mut self) -> DeserializeResult<V> { loop { let child = self @@ -381,6 +413,8 @@ impl Element { } } + /// Optionally pop a text value from the element. + /// E.g. when there is an xs:sequence. pub fn pop_value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> { loop { let child = self.content.pop_front(); @@ -404,6 +438,7 @@ impl Element { } } + /// Ensure there is no more element content left. pub fn no_more_content(self) -> DeserializeResult<Self> { if self .content @@ -423,11 +458,13 @@ impl Element { } } + /// Create a new `ElementBuilder`. pub fn builder(name: impl ToString, namespace: Option<impl ToString>) -> ElementBuilder { ElementBuilder::new(name, namespace) } } +/// Builder for the `Element` type. pub struct ElementBuilder { name: Name, namespace_declaration_overrides: Vec<NamespaceDeclaration>, @@ -436,6 +473,7 @@ pub struct ElementBuilder { } impl ElementBuilder { + /// Create a new `ElementBuilder`. pub fn new(name: impl ToString, namespace: Option<impl ToString>) -> Self { Self { name: Name { @@ -448,6 +486,7 @@ impl ElementBuilder { } } + /// Push a namespace declaration override onto the element builder. pub fn push_namespace_declaration_override( mut self, prefix: Option<impl ToString>, @@ -461,6 +500,7 @@ impl ElementBuilder { self } + /// Push an attribute onto the element builder. pub fn push_attribute<N: ToString, V: ToString>(mut self, name: N, value: V) -> Self { self.attributes.push(( // TODO: make sure name is a valid name, same for prefixes @@ -473,6 +513,7 @@ impl ElementBuilder { self } + /// Push a namespaced attribute onto the element builder. pub fn push_attribute_namespaced( mut self, namespace: impl ToString, @@ -490,17 +531,20 @@ impl ElementBuilder { } // TODO: use references for everything to avoid cloning + /// Push a child element onto the element builder. pub fn push_child(mut self, child: impl IntoElement) -> Self { self.content.push(ContentBuilder::Element(child.builder())); self } // TODO: better way for push_text to work, empty string should be empty element no matter what + /// Push a text value onto the element builder. pub fn push_text(mut self, text: impl ToString) -> Self { self.content.push(ContentBuilder::Text(text.to_string())); self } + /// Optionally push an attribute onto the element builder. pub fn push_attribute_opt(self, name: impl ToString, value: Option<impl ToString>) -> Self { if let Some(value) = value { self.push_attribute(name, value) @@ -509,6 +553,7 @@ impl ElementBuilder { } } + /// Optionally push a namespaced attribute onto the element builder. pub fn push_attribute_opt_namespaced( self, namespace: impl ToString, @@ -522,6 +567,7 @@ impl ElementBuilder { } } + /// Optionally push a child onto the element builder. pub fn push_child_opt(self, child: Option<impl IntoElement>) -> Self { if let Some(child) = child { self.push_child(child) @@ -530,6 +576,7 @@ impl ElementBuilder { } } + /// Optionally push a text value onto the element builder. pub fn push_text_opt(self, text: Option<impl ToString>) -> Self { if let Some(text) = text { self.push_text(text) @@ -538,11 +585,13 @@ impl ElementBuilder { } } + /// Optionally push a content item onto the element builder. pub fn push_content(mut self, content: ContentBuilder) -> Self { self.content.push(content); self } + /// Optionally push content items onto the element builder. pub fn push_children(self, children: Vec<impl IntoContent>) -> Self { let mut element_builder = self; for child in children { @@ -551,6 +600,7 @@ impl ElementBuilder { element_builder } + /// Build an `Element` from the `ElementBuilder`. pub fn build(&self) -> Result<Element> { let mut namespace_declaration_overrides = HashSet::new(); for namespace_declaration in &self.namespace_declaration_overrides { @@ -588,6 +638,7 @@ impl ElementBuilder { } } +/// Trait for conversion from a type into an (`Element`) `Content` item. pub trait IntoContent { fn into_content(&self) -> Content { self.builder().build().unwrap() @@ -605,17 +656,23 @@ where } } +/// Trait for conversion from some `Element` `Content` into another type. pub trait FromContent: Sized { fn from_content(content: Content) -> DeserializeResult<Self>; } +/// Builder for `Content`. pub enum ContentBuilder { + /// A child element. Element(ElementBuilder), + /// A text value. Text(String), + /// A comment. Comment(String), } impl ContentBuilder { + /// Build a `Content` item from the builder. pub fn build(&self) -> Result<Content> { match self { ContentBuilder::Element(element_builder) => { @@ -627,6 +684,7 @@ impl ContentBuilder { } } +/// Escape a str into an XML escaped string. pub fn escape_str(s: &str) -> String { let mut string = String::new(); for str in s.split_inclusive(|c| c == '<' || c == '&' || c == '>') { diff --git a/src/error.rs b/src/error.rs index ae4aa26..26b7766 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,57 +1,75 @@ use std::{ collections::{HashMap, VecDeque}, - fmt, num::ParseIntError, str::Utf8Error, sync::Arc, }; use thiserror::Error; +#[cfg(target_arch = "wasm32")] +use wasm_bindgen::JsValue; use crate::{ element::{Content, Name, NamespaceDeclaration}, Element, }; +/// Error type for the `FromElement` trait. Used when deserialising from an `Element`. #[derive(Error, Debug, Clone)] pub enum DeserializeError { + /// Could not parse string. #[error("could not parse string {0:?} to requested value")] FromStr(String), + /// Unexpected attributes. #[error("unexpected attributes {0:?}")] UnexpectedAttributes(HashMap<Name, String>), + /// Unexpected element content. #[error("unexpected element content: {0:?}")] UnexpectedContent(VecDeque<Content>), + /// Missing attribute. #[error("attribute `{0:?}` missing")] MissingAttribute(Name), + /// Incorrect localname encountered. #[error("incorrect localname: expected `{expected:?}`, found `{found:?}`")] IncorrectName { expected: String, found: String }, + /// Incorrect namespace encountered. #[error("incorrect namespace: expected `{expected:?}`, found `{found:?}`")] IncorrectNamespace { expected: String, found: String }, + /// Unqualified namespace when expecting qualified namespace. #[error("unqualified namespace: expected `{expected:?}`")] Unqualified { expected: String }, + /// Element missing expected child. #[error("element missing expected child")] MissingChild, + /// Element missing expected text value. #[error("element missing expected text value")] MissingValue, // not used by crate (yet), but may be used by consumers implementing FromElement + /// Unexpected element. #[error("unexpected element: {0:?}")] UnexpectedElement(Element), + /// Attribute is an empty string. #[error("attribute `{0}` is an empty string")] AttributeEmptyString(String), + /// Empty string. #[error("empty string")] EmptyString, } +/// General error type for functions in the crate. // TODO: add error context (usually the stanza) #[derive(Error, Debug, Clone)] pub enum Error { + #[cfg(target_arch = "wasm32")] + #[error("websocket closed")] + WebSocketClosed, #[error("io: {0}")] // TODO: is this okay? ReadError(Arc<std::io::Error>), #[error("utf8 conversion: {0}")] Utf8Error(#[from] Utf8Error), #[error("nom parsing: {0}")] - ParseError(String), + ParseError(String, String), #[error("unknown xml entity reference `&{0};`")] EntityProcessError(String), #[error("invalid character reference: {0}")] @@ -72,6 +90,34 @@ pub enum Error { Deserialize(#[from] DeserializeError), #[error("root element has already been fully processed")] RootElementEnded, + #[cfg(target_arch = "wasm32")] + #[error("websocket error: {0}")] + Websocket(#[from] WebsocketError), +} + +/// Websocket-related errors. +#[cfg(target_arch = "wasm32")] +#[derive(Error, Debug, Clone)] +pub enum WebsocketError { + /// Websocket write error. + #[error("write")] + Write, + /// Invalid encoding. + #[error("invalid encoding")] + InvalidEncoding, + /// Can't decode blob. + #[error("can't decode blob")] + CantDecodeBlob, + /// Unknown data type. + #[error("unknown data type")] + UnknownDataType, +} + +#[cfg(target_arch = "wasm32")] +impl From<JsValue> for Error { + fn from(_e: JsValue) -> Self { + Self::Websocket(WebsocketError::Write) + } } impl From<std::io::Error> for Error { @@ -80,12 +126,16 @@ impl From<std::io::Error> for Error { } } +/// Character reference decode error. #[derive(Error, Debug, Clone)] pub enum CharRefError { + /// Int parsing. #[error("int parsing: {0}")] ParseInt(#[from] ParseIntError), + /// Integer is not a valid char. #[error("u32 `{0}` does not represent a valid char")] IntegerNotAChar(u32), + /// Character is an invalid XML char. #[error("`{0}` is not a valid xml char")] InvalidXMLChar(char), } @@ -1,19 +1,44 @@ +//! # peanuts: An ergonomic (async) xml reader/writer library. +//! +//! Features: +//! +//! - Serialisation +//! - Deserialisation +//! - DOM navigation +//! - Namespacing +//! - Websocket framing + +/// XML prolog declaration types. pub mod declaration; -pub mod element; -mod endable; +mod element; mod error; -mod loggable; -pub mod reader; +mod reader; mod writer; -pub mod xml; +// TODO: alternative raw xml API +mod xml; +/// Result type for the crate. pub type Result<T> = std::result::Result<T, error::Error>; +/// XML namespace URI for the `xml:` namespace prefix. pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace"; +/// XML namespace URI for the `xmlns:` namespace prefix. pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/"; +pub use element::Content; +pub use element::ContentBuilder; +pub use element::DeserializeResult; pub use element::Element; +pub use element::ElementBuilder; +pub use element::FromContent; +pub use element::FromElement; +pub use element::IntoContent; +pub use element::IntoElement; pub use error::DeserializeError; pub use error::Error; +pub use reader::ReadableString; pub use reader::Reader; +#[cfg(target_arch = "wasm32")] +pub use reader::WebSocketOnMessageRead; +pub use writer::Loggable; pub use writer::Writer; diff --git a/src/reader.rs b/src/reader.rs index 9eb7c91..a403171 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1,12 +1,24 @@ +use std::io::Write; + use circular::Buffer; +#[cfg(target_arch = "wasm32")] +use js_sys::{ArrayBuffer, Uint8Array}; use nom::Err; use std::{ collections::{HashMap, HashSet, VecDeque}, str, }; -use tokio::io::{AsyncRead, AsyncReadExt}; -use tracing::{debug, info, trace}; - +use tokio::io::AsyncRead; +#[cfg(target_arch = "wasm32")] +use tokio::sync::mpsc; +use tracing::{info, trace}; +#[cfg(target_arch = "wasm32")] +use wasm_bindgen::{closure::Closure, JsCast}; +#[cfg(target_arch = "wasm32")] +use web_sys::{Blob, MessageEvent}; + +#[cfg(target_arch = "wasm32")] +use crate::error::WebsocketError; use crate::{ declaration::{Declaration, VersionInfo}, element::{Content, Element, FromElement, Name, NamespaceDeclaration}, @@ -17,19 +29,144 @@ use crate::{ static MAX_STANZA_SIZE: usize = 65536; -/// streaming reader that tracks depth and available namespaces at current depth +/// Reader that tracks depth and corresponding declared/available namespaces. #[derive(Debug)] pub struct Reader<R> { inner: R, - pub buffer: Buffer, + buffer: Buffer, // holds which tags we are in atm over depth // to have names reference namespaces could depth: Vec<Name>, namespace_declarations: Vec<HashSet<NamespaceDeclaration>>, + unendable: bool, root_ended: bool, } +/// Represents a WebSocket Message, after converting from JavaScript type. +/// from https://github.com/najamelan/ws_stream_wasm/blob/dev/src/ws_message.rs +#[cfg(target_arch = "wasm32")] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum WsMessage { + /// The data of the message is a string. + /// + Text(String), + + /// The message contains binary data. + /// + Binary(Vec<u8>), +} + +/// This will convert the JavaScript event into a WsMessage. Note that this +/// will only work if the connection is set to use the binary type ArrayBuffer. +/// On binary type Blob, this will panic. +/// from https://github.com/najamelan/ws_stream_wasm/blob/dev/src/ws_message.rs +#[cfg(target_arch = "wasm32")] +impl TryFrom<MessageEvent> for WsMessage { + type Error = WebsocketError; + + fn try_from(evt: MessageEvent) -> std::result::Result<Self, Self::Error> { + match evt.data() { + d if d.is_instance_of::<ArrayBuffer>() => { + let buffy = Uint8Array::new(d.unchecked_ref()); + let mut v = vec![0; buffy.length() as usize]; + + buffy.copy_to(&mut v); // FIXME: get rid of this copy + + Ok(WsMessage::Binary(v)) + } + + // We don't allow invalid encodings. In principle if needed, + // we could add a variant to WsMessage with a CString or an OsString + // to allow the user to access this data. However until there is a usecase, + // I'm not inclined, amongst other things because the conversion from Js isn't very + // clear and it would require a bunch of testing for something that's a rather bad + // idea to begin with. If you need data that is not a valid string, use a binary + // message. + // + d if d.is_string() => match d.as_string() { + Some(text) => Ok(WsMessage::Text(text)), + None => Err(WebsocketError::InvalidEncoding), + }, + + // We have set the binary mode to array buffer (WsMeta::connect), so normally this shouldn't happen. + // That is as long as this is used within the context of the WsMeta constructor. + // + d if d.is_instance_of::<Blob>() => Err(WebsocketError::CantDecodeBlob), + + // should never happen. + // + _ => Err(WebsocketError::UnknownDataType), + } + } +} + +#[cfg(target_arch = "wasm32")] +#[derive(Debug)] +/// Receiver for websocket frames. Implements `Readable` for asynchronous XML reading. +pub struct WebSocketOnMessageRead { + queue: mpsc::UnboundedReceiver<WsMessage>, +} + +#[cfg(target_arch = "wasm32")] +impl WebSocketOnMessageRead { + /// Create a new `WebsocketOnMessageRead` with corresponding `on_message` event closure. + pub fn new() -> (Closure<dyn FnMut(MessageEvent)>, Self) { + let (send, recv) = mpsc::unbounded_channel(); + let on_msg = Closure::wrap(Box::new(move |msg_evt: MessageEvent| { + let msg_evt = msg_evt.try_into(); + match msg_evt { + Ok(msg_evt) => match send.send(msg_evt) { + Ok(()) => {} + Err(e) => { + tracing::error!("message event send error: {:?}", e); + } + }, + Err(e) => { + tracing::error!("websocket receive error: {}", e); + } + } + }) as Box<dyn FnMut(MessageEvent)>); + + (on_msg, Self { queue: recv }) + } +} + +#[cfg(target_arch = "wasm32")] +impl Readable for WebSocketOnMessageRead { + async fn read_buf(&mut self, buffer: &mut Buffer) -> Result<usize> { + let msg = self.queue.recv().await; + let msg = match msg { + Some(msg) => msg, + None => return Err(Error::WebSocketClosed), + }; + match msg { + WsMessage::Text(s) => { + let text = s.as_bytes(); + Ok(buffer.write(text)?) + } + WsMessage::Binary(v) => Ok(buffer.write(&v)?), + } + } +} + +/// Trait for abstracting asynchronous read streams. +pub trait Readable { + fn read_buf(&mut self, buffer: &mut Buffer) + -> impl std::future::Future<Output = Result<usize>>; +} + +/// String wrapper which implements Readable, for string parsing. +pub struct ReadableString(pub String); + +impl Readable for ReadableString { + async fn read_buf(&mut self, buffer: &mut Buffer) -> Result<usize> { + let string = self.0.split_off(0); + Ok(buffer.write(string.as_bytes())?) + } +} + impl<R> Reader<R> { + /// Create a new `Reader` which is constrained to a single root element. pub fn new(reader: R) -> Self { let mut default_declarations = HashSet::new(); default_declarations.insert(NamespaceDeclaration { @@ -46,25 +183,52 @@ impl<R> Reader<R> { depth: Vec::new(), // TODO: make sure reserved namespaces are never overwritten namespace_declarations: vec![default_declarations], + unendable: false, + root_ended: false, + } + } + + /// Create a new `Reader` which is not constrained to a single root element. + pub fn new_unendable(reader: R) -> Self { + let mut default_declarations = HashSet::new(); + default_declarations.insert(NamespaceDeclaration { + prefix: Some("xml".to_string()), + namespace: XML_NS.to_string(), + }); + default_declarations.insert(NamespaceDeclaration { + prefix: Some("xmlns".to_string()), + namespace: XMLNS_NS.to_string(), + }); + Self { + inner: reader, + buffer: Buffer::with_capacity(MAX_STANZA_SIZE), + depth: Vec::new(), + // TODO: make sure reserved namespaces are never overwritten + namespace_declarations: vec![default_declarations], + unendable: true, root_ended: false, } } + /// Extract the inner type from the `Reader`. pub fn into_inner(self) -> R { self.inner } } -impl<R> Reader<R> +impl<R> Readable for R where R: AsyncRead + Unpin, { - pub async fn read_buf<'s>(&mut self) -> Result<usize> { - Ok(self.inner.read_buf(&mut self.buffer).await?) + async fn read_buf(&mut self, buffer: &mut Buffer) -> Result<usize> { + Ok(tokio::io::AsyncReadExt::read_buf(self, buffer).await?) } +} +impl<R: Readable> Reader<R> { + /// Attempt to read an XML prolog, which could include an XML declaration, miscellaneous items (e.g. comments, processing instructions), and/or a doctype declaration. pub async fn read_prolog<'s>(&'s mut self) -> Result<Option<Declaration>> { - if self.root_ended { + if !self.unendable && self.root_ended { return Err(Error::RootElementEnded); } loop { @@ -104,30 +268,37 @@ where } std::result::Result::Err(e) => match e { Err::Incomplete(_) => { - self.read_buf().await?; + self.inner.read_buf(&mut self.buffer).await?; } // TODO: better error - Err::Error(e) => return Err(Error::ParseError(e.to_string())), - Err::Failure(e) => return Err(Error::ParseError(e.to_string())), + Err::Error(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } + Err::Failure(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } }, } } } + /// Read a start tag, moving up in document depth, and convert it into a type which implements `FromElement`. pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> { let element = self.read_start_tag().await?; trace!("read element start: {:?}", element); Ok(FromElement::from_element(element)?) } + /// Read a full element (start tag + content + end tag, or empty tag) and convert it into a type which implements `FromElement`. pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> { let element = self.read_element().await?; trace!("read element: {:?}", element); Ok(FromElement::from_element(element)?) } + /// Read a start tag, moving up in document depth. pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> { - if self.root_ended { + if !self.unendable && self.root_ended { return Err(Error::RootElementEnded); } loop { @@ -147,18 +318,23 @@ where } std::result::Result::Err(e) => match e { Err::Incomplete(_) => { - self.read_buf().await?; + self.inner.read_buf(&mut self.buffer).await?; } // TODO: better error - Err::Error(e) => return Err(Error::ParseError(e.to_string())), - Err::Failure(e) => return Err(Error::ParseError(e.to_string())), + Err::Error(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } + Err::Failure(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } }, } } } + /// Read an end tag, moving down in document depth. pub async fn read_end_tag<'s>(&'s mut self) -> Result<()> { - if self.root_ended { + if !self.unendable && self.root_ended { return Err(Error::RootElementEnded); } loop { @@ -181,18 +357,23 @@ where } std::result::Result::Err(e) => match e { Err::Incomplete(_) => { - self.read_buf().await?; + self.inner.read_buf(&mut self.buffer).await?; } // TODO: better error - Err::Error(e) => return Err(Error::ParseError(e.to_string())), - Err::Failure(e) => return Err(Error::ParseError(e.to_string())), + Err::Error(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } + Err::Failure(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } }, } } } + /// Read a full element (start tag + content + end tag, or empty tag). pub async fn read_element<'s>(&'s mut self) -> Result<Element> { - if self.root_ended { + if !self.unendable && self.root_ended { return Err(Error::RootElementEnded); } loop { @@ -212,18 +393,23 @@ where } std::result::Result::Err(e) => match e { Err::Incomplete(_) => { - self.read_buf().await?; + self.inner.read_buf(&mut self.buffer).await?; } // TODO: better error - Err::Error(e) => return Err(Error::ParseError(e.to_string())), - Err::Failure(e) => return Err(Error::ParseError(e.to_string())), + Err::Error(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } + Err::Failure(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } }, } } } + /// Read element content (text, another full element, a comment, a PI). pub async fn read_content<'s>(&'s mut self) -> Result<Content> { - if self.root_ended { + if !self.unendable && self.root_ended { return Err(Error::RootElementEnded); } let mut last_char = false; @@ -240,7 +426,7 @@ where } std::result::Result::Err(e) => match e { Err::Incomplete(_) => { - self.read_buf().await?; + self.inner.read_buf(&mut self.buffer).await?; } _ => match xml::ContentItem::parse(input) { Ok((rest, content_item)) => match content_item { @@ -289,7 +475,7 @@ where } else { let len = self.buffer.available_data() - rest.as_bytes().len(); - let comment = comment.to_string(); + let comment = comment.0.to_string(); self.buffer.consume(len); return Ok(Content::Comment(comment)); } @@ -297,11 +483,15 @@ where }, std::result::Result::Err(e) => match e { Err::Incomplete(_) => { - self.read_buf().await?; + self.inner.read_buf(&mut self.buffer).await?; } // TODO: better error - Err::Error(e) => return Err(Error::ParseError(e.to_string())), - Err::Failure(e) => return Err(Error::ParseError(e.to_string())), + Err::Error(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } + Err::Failure(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } }, }, }, @@ -351,18 +541,22 @@ where } xml::ContentItem::Comment(comment) => { let len = self.buffer.available_data() - rest.as_bytes().len(); - let comment = comment.to_string(); + let comment = comment.0.to_string(); self.buffer.consume(len); return Ok(Content::Comment(comment)); } }, std::result::Result::Err(e) => match e { Err::Incomplete(_) => { - self.read_buf().await?; + self.inner.read_buf(&mut self.buffer).await?; } // TODO: better error - Err::Error(e) => return Err(Error::ParseError(e.to_string())), - Err::Failure(e) => return Err(Error::ParseError(e.to_string())), + Err::Error(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } + Err::Failure(e) => { + return Err(Error::ParseError(input.to_string(), e.to_string())) + } }, } } @@ -371,6 +565,7 @@ where } impl<R> Reader<R> { + /// Convert a start tag into an `Element` given a mutable document context. fn start_tag_from_xml( depth: &mut Vec<Name>, namespace_declarations: &mut Vec<HashSet<NamespaceDeclaration>>, @@ -388,7 +583,7 @@ impl<R> Reader<R> { }) { let prefix = match prefix { xml::NSAttName::PrefixedAttName(prefixed_att_name) => { - Some(prefixed_att_name.to_string()) + Some(prefixed_att_name.0.to_string()) } xml::NSAttName::DefaultAttName => None, }; @@ -408,6 +603,10 @@ impl<R> Reader<R> { .chain(element_namespace_declarations.iter()) .collect(); + trace!( + "namespace declaration stack: {:?}", + namespace_declarations_stack + ); // element name let element_namespace_declaration; @@ -488,6 +687,7 @@ impl<R> Reader<R> { }); } + /// Ensure an end tag is acceptable given a document context. fn end_tag_from_xml( depth: &mut Vec<Name>, namespace_declarations: &mut Vec<HashSet<NamespaceDeclaration>>, @@ -535,6 +735,7 @@ impl<R> Reader<R> { } } + /// Convert an xml element (empty or not) into an `Element` given a mutable document context. fn element_from_xml( namespace_declarations: &mut Vec<HashSet<NamespaceDeclaration>>, element: xml::Element, @@ -571,7 +772,7 @@ impl<R> Reader<R> { }) { let prefix = match prefix { xml::NSAttName::PrefixedAttName(prefixed_att_name) => { - Some(prefixed_att_name.to_string()) + Some(prefixed_att_name.0.to_string()) } xml::NSAttName::DefaultAttName => None, }; @@ -709,6 +910,7 @@ impl<R> Reader<R> { }); } + /// Convert xml content into a `VecDeque` of `Content` given a document context. fn content_from_xml( namespaces: &mut Vec<HashSet<NamespaceDeclaration>>, xml_content: xml::Content, diff --git a/src/writer.rs b/src/writer.rs index 7ed1775..1818fca 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -3,26 +3,33 @@ use std::collections::HashSet; use async_recursion::async_recursion; use tokio::io::{AsyncWrite, AsyncWriteExt}; use tracing::info; +#[cfg(target_arch = "wasm32")] +use web_sys::WebSocket; use crate::{ declaration::{Declaration, VersionInfo}, element::{escape_str, Content, Element, IntoContent, IntoElement, Name, NamespaceDeclaration}, - endable::Endable, error::Error, - loggable::Loggable, xml::{self, composers::Composer, parsers_complete::Parser}, Result, XMLNS_NS, XML_NS, }; +use endable::Endable; +pub use loggable::Loggable; -// pub struct Writer<W, C = Composer> { +mod endable; +mod loggable; + +/// Writer that tracks depth and corresponding declared/available namespaces. #[derive(Debug)] pub struct Writer<W> { - inner: Endable<Loggable<W>>, + inner: Endable<W>, + unendable: bool, depth: Vec<Name>, namespace_declarations: Vec<HashSet<NamespaceDeclaration>>, } -impl<W> Writer<W> { +impl<W> Writer<Loggable<W>> { + /// Create a new `Writer` which is constrained to a single root element. pub fn new(writer: W) -> Self { let mut default_declarations = HashSet::new(); default_declarations.insert(NamespaceDeclaration { @@ -35,19 +42,477 @@ impl<W> Writer<W> { }); Self { inner: Endable::new(Loggable::new(writer)), + unendable: false, depth: Vec::new(), namespace_declarations: vec![default_declarations], } } + /// Create a new `Writer` which is not constrained to a single root element. + pub fn new_unendable(writer: W) -> Self { + let mut default_declarations = HashSet::new(); + default_declarations.insert(NamespaceDeclaration { + prefix: Some("xml".to_string()), + namespace: XML_NS.to_string(), + }); + default_declarations.insert(NamespaceDeclaration { + prefix: Some("xmlns".to_string()), + namespace: XMLNS_NS.to_string(), + }); + Self { + inner: Endable::new(Loggable::new(writer)), + unendable: true, + depth: Vec::new(), + namespace_declarations: vec![default_declarations], + } + } + + /// Extract the inner type from the `Writer`. pub fn into_inner(self) -> W { self.inner.into_inner().into_inner() } } -impl<W: AsyncWrite + Unpin + Send> Writer<W> { +#[cfg(target_arch = "wasm32")] +impl Writer<WebSocket> { + /// Create a new `Writer` which is constrained to a single root element. + pub fn new(websocket: WebSocket) -> Self { + let mut default_declarations = HashSet::new(); + default_declarations.insert(NamespaceDeclaration { + prefix: Some("xml".to_string()), + namespace: XML_NS.to_string(), + }); + default_declarations.insert(NamespaceDeclaration { + prefix: Some("xmlns".to_string()), + namespace: XMLNS_NS.to_string(), + }); + Self { + inner: Endable::new(websocket), + unendable: false, + depth: Vec::new(), + namespace_declarations: vec![default_declarations], + } + } + + /// Extract the inner `WebSocket` from the `Writer`. + pub fn into_inner(self) -> WebSocket { + self.inner.into_inner() + } + + /// Create a new `Writer` which is not constrained to a single root element. + pub fn new_unendable(websocket: WebSocket) -> Self { + let mut default_declarations = HashSet::new(); + default_declarations.insert(NamespaceDeclaration { + prefix: Some("xml".to_string()), + namespace: XML_NS.to_string(), + }); + default_declarations.insert(NamespaceDeclaration { + prefix: Some("xmlns".to_string()), + namespace: XMLNS_NS.to_string(), + }); + Self { + inner: Endable::new(websocket), + unendable: true, + depth: Vec::new(), + namespace_declarations: vec![default_declarations], + } + } + + /// Write an XML declaration with the provided `VersionInfo`. pub async fn write_declaration(&mut self, version: VersionInfo) -> Result<()> { - let writer = self.inner.try_as_mut()?; + let declaration = Declaration::version(version); + let version_info; + match declaration.version_info { + VersionInfo::One => version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::One), + VersionInfo::OneDotOne => { + version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::OneDotOne) + } + } + let declaration = xml::XMLDecl { + version_info, + encoding_decl: None, + sd_decl: None, + }; + let declaration = declaration.to_string(); + if self.unendable { + self.inner.ignore_end().send_with_str(&declaration)?; + } else { + self.inner.try_as_mut()?.send_with_str(&declaration)?; + } + Ok(()) + } + + /// Write a full element corresponding with the item implementing `IntoElement` (start tag + content + end tag). + pub async fn write_full(&mut self, into_element: &impl IntoElement) -> Result<()> { + let element = into_element.into_element(); + let mut frame = String::new(); + self.write_element_to_frame(&element, &mut frame)?; + self.inner.ignore_end().send_with_str(&frame)?; + info!("wrote element: {}", frame); + Ok(()) + } + + /// Write the start tag of an item that implements `IntoElement`. Navigates up the document. + pub async fn write_start(&mut self, into_element: &impl IntoElement) -> Result<()> { + let element = into_element.into_element(); + let mut frame = String::new(); + self.write_element_start_to_frame(&element, &mut frame)?; + self.inner.ignore_end().send_with_str(&frame)?; + info!("wrote element: {}", frame); + Ok(()) + } + + /// Write all the inner content (everything within the start and end tag of an xml element) of an item that implements `IntoElement`. In the case of an empty element, write nothing. + pub async fn write_all_content(&mut self, into_element: &impl IntoElement) -> Result<()> { + let mut frame = String::new(); + for content in &into_element.get_content() { + self.write_content_to_frame(content, &mut frame)?; + } + self.inner.ignore_end().send_with_str(&frame)?; + info!("wrote element: {}", frame); + Ok(()) + } + + /// Write an item that implements `IntoContent`. Could be an element, some text, a comment, etc. Anything that could be included in an element body. + pub async fn write(&mut self, into_content: &impl IntoContent) -> Result<()> { + let content = into_content.into_content(); + let mut frame = String::new(); + self.write_content_to_frame(&content, &mut frame)?; + self.inner.ignore_end().send_with_str(&frame)?; + info!("wrote element: {}", frame); + Ok(()) + } + + /// Navigate down the document structure and write the end tag for the current element opened in the document context. + pub async fn write_end(&mut self) -> Result<()> { + let mut frame = String::new(); + self.write_end_tag_to_frame(&mut frame)?; + self.inner.ignore_end().send_with_str(&frame)?; + info!("wrote element: {}", frame); + Ok(()) + } + + fn write_element_to_frame(&mut self, element: &Element, frame: &mut String) -> Result<()> { + if element.content.is_empty() { + self.write_empty_to_frame(element, frame)?; + } else { + self.write_element_start_to_frame(element, frame)?; + for content in &element.content { + self.write_content_to_frame(content, frame)?; + } + self.write_end_tag_to_frame(frame)?; + } + Ok(()) + } + + fn write_empty_to_frame(&mut self, element: &Element, frame: &mut String) -> Result<()> { + let _writer = if self.unendable { + self.inner.ignore_end() + } else { + self.inner.try_as_mut()? + }; + + let mut namespace_declarations_stack: Vec<_> = self + .namespace_declarations + .iter() + .flatten() + .chain(&element.namespace_declaration_overrides) + .collect(); + + let mut namespace_declarations = element.namespace_declaration_overrides.clone(); + + let default_namespace_declaration; + let prefix; + if let Some(namespace) = &element.name.namespace { + if let Some(name_namespace_declaration) = namespace_declarations_stack + .iter() + .rfind(|namespace_declaration| namespace_declaration.namespace == *namespace) + { + prefix = name_namespace_declaration.prefix.as_ref(); + } else { + default_namespace_declaration = NamespaceDeclaration { + prefix: None, + namespace: namespace.clone(), + }; + if namespace_declarations.insert(default_namespace_declaration.clone()) { + namespace_declarations_stack.push(&default_namespace_declaration); + prefix = None + } else { + return Err(Error::DuplicateNameSpaceDeclaration(NamespaceDeclaration { + prefix: None, + namespace: namespace.clone(), + })); + } + } + } else { + prefix = None + } + + let name; + if let Some(prefix) = &prefix { + name = xml::QName::PrefixedName(xml::PrefixedName { + prefix: xml::Prefix::parse_full(prefix)?, + local_part: xml::LocalPart::parse_full(&element.name.local_name)?, + }) + } else { + name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full( + &element.name.local_name, + )?) + } + + let mut attributes = Vec::new(); + + for namespace_declaration in namespace_declarations.iter() { + let ns_name = namespace_declaration + .prefix + .as_ref() + .map(|prefix| -> Result<_> { + Ok(xml::NSAttName::PrefixedAttName(xml::PrefixedAttName( + xml::NCName::parse_full(&prefix)?, + ))) + }) + .unwrap_or(Ok(xml::NSAttName::DefaultAttName))?; + let value = xml::AttValue::from(namespace_declaration.namespace.as_str()); + let xml_attribute = xml::Attribute::NamespaceDeclaration { ns_name, value }; + attributes.push(xml_attribute); + } + + for (name, value) in &element.attributes { + let prefix; + if let Some(namespace) = &name.namespace { + let name_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace_declaration| namespace_declaration.namespace == *namespace) + .ok_or(Error::UndeclaredNamespace(namespace.clone()))?; + prefix = name_namespace_declaration.prefix.as_ref(); + } else { + prefix = None + } + + let att_name; + if let Some(prefix) = &prefix { + att_name = xml::QName::PrefixedName(xml::PrefixedName { + prefix: xml::Prefix::parse_full(prefix)?, + local_part: xml::LocalPart::parse_full(&name.local_name)?, + }) + } else { + att_name = + xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?) + } + + let value = xml::AttValue::from(value.as_str()); + + let xml_attribute = xml::Attribute::Attribute { + name: att_name, + value, + }; + attributes.push(xml_attribute); + } + + let tag = xml::EmptyElemTag { name, attributes }; + + frame.push_str(&tag.to_string()); + + // if self.depth.is_empty() { + // self.inner.end(); + // } + + Ok(()) + } + + fn write_element_start_to_frame( + &mut self, + element: &Element, + frame: &mut String, + ) -> Result<()> { + let _writer = if self.unendable { + self.inner.ignore_end() + } else { + self.inner.try_as_mut()? + }; + + let mut namespace_declarations_stack: Vec<_> = self + .namespace_declarations + .iter() + .flatten() + .chain(&element.namespace_declaration_overrides) + .collect(); + + let mut namespace_declarations = element.namespace_declaration_overrides.clone(); + + let default_namespace_declaration; + let prefix; + if let Some(namespace) = &element.name.namespace { + if let Some(name_namespace_declaration) = namespace_declarations_stack + .iter() + .rfind(|namespace_declaration| namespace_declaration.namespace == *namespace) + { + prefix = name_namespace_declaration.prefix.as_ref(); + } else { + default_namespace_declaration = NamespaceDeclaration { + prefix: None, + namespace: namespace.clone(), + }; + if namespace_declarations.insert(default_namespace_declaration.clone()) { + namespace_declarations_stack.push(&default_namespace_declaration); + prefix = None + } else { + return Err(Error::DuplicateNameSpaceDeclaration(NamespaceDeclaration { + prefix: None, + namespace: namespace.clone(), + })); + } + } + } else { + prefix = None + } + + let name; + if let Some(prefix) = &prefix { + name = xml::QName::PrefixedName(xml::PrefixedName { + prefix: xml::Prefix::parse_full(prefix)?, + local_part: xml::LocalPart::parse_full(&element.name.local_name)?, + }) + } else { + name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full( + &element.name.local_name, + )?) + } + + let mut attributes = Vec::new(); + + for namespace_declaration in namespace_declarations.iter() { + let ns_name = namespace_declaration + .prefix + .as_ref() + .map(|prefix| -> Result<_> { + Ok(xml::NSAttName::PrefixedAttName(xml::PrefixedAttName( + xml::NCName::parse_full(&prefix)?, + ))) + }) + .unwrap_or(Ok(xml::NSAttName::DefaultAttName))?; + let value = xml::AttValue::from(namespace_declaration.namespace.as_str()); + let xml_attribute = xml::Attribute::NamespaceDeclaration { ns_name, value }; + attributes.push(xml_attribute); + } + + for (name, value) in &element.attributes { + let prefix; + if let Some(namespace) = &name.namespace { + let name_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace_declaration| namespace_declaration.namespace == *namespace) + .ok_or(Error::UndeclaredNamespace(namespace.clone()))?; + prefix = name_namespace_declaration.prefix.as_ref(); + } else { + prefix = None + } + + let att_name; + if let Some(prefix) = &prefix { + att_name = xml::QName::PrefixedName(xml::PrefixedName { + prefix: xml::Prefix::parse_full(prefix)?, + local_part: xml::LocalPart::parse_full(&name.local_name)?, + }) + } else { + att_name = + xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?) + } + + let value = xml::AttValue::from(value.as_str()); + + let xml_attribute = xml::Attribute::Attribute { + name: att_name, + value, + }; + attributes.push(xml_attribute); + } + + let s_tag = xml::STag { name, attributes }; + + frame.push_str(&s_tag.to_string()); + + self.depth.push(element.name.clone()); + self.namespace_declarations + .push(namespace_declarations.clone()); + Ok(()) + } + + fn write_content_to_frame(&mut self, content: &Content, frame: &mut String) -> Result<()> { + match content { + Content::Element(element) => self.write_element_to_frame(element, frame)?, + Content::Text(text) => { + let _writer = if self.unendable { + self.inner.ignore_end() + } else { + self.inner.try_as_mut()? + }; + frame.push_str(&escape_str(text)) + } + // TODO: comments and PI + Content::PI => {} + Content::Comment(_) => {} + } + Ok(()) + } + + fn write_end_tag_to_frame(&mut self, frame: &mut String) -> Result<()> { + let _writer = if self.unendable { + self.inner.ignore_end() + } else { + self.inner.try_as_mut()? + }; + if let Some(name) = &self.depth.pop() { + let e_tag; + let namespace_declarations_stack: Vec<_> = + self.namespace_declarations.iter().flatten().collect(); + + let prefix; + if let Some(namespace) = &name.namespace { + let name_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace_declaration| namespace_declaration.namespace == *namespace) + .ok_or(Error::UndeclaredNamespace(namespace.clone()))?; + prefix = name_namespace_declaration.prefix.as_ref(); + } else { + prefix = None + } + + if let Some(prefix) = &prefix { + e_tag = xml::ETag { + name: xml::QName::PrefixedName(xml::PrefixedName { + prefix: xml::Prefix::parse_full(prefix)?, + local_part: xml::LocalPart::parse_full(&name.local_name)?, + }), + }; + } else { + e_tag = xml::ETag { + name: xml::QName::UnprefixedName(xml::UnprefixedName::parse_full( + &name.local_name, + )?), + }; + } + frame.push_str(&e_tag.to_string()); + self.namespace_declarations.pop(); + + // if self.depth.is_empty() { + // self.inner.end(); + // } + Ok(()) + } else { + return Err(Error::NotInElement("".to_string())); + } + } +} + +impl<W: AsyncWrite + Unpin + Send> Writer<Loggable<W>> { + /// Write an XML declaration with the provided `VersionInfo`. + pub async fn write_declaration(&mut self, version: VersionInfo) -> Result<()> { + let writer = if self.unendable { + self.inner.ignore_end() + } else { + self.inner.try_as_mut()? + }; let declaration = Declaration::version(version); let version_info; match declaration.version_info { @@ -65,53 +530,68 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> { Ok(()) } + /// Write a full element corresponding with the item implementing `IntoElement` (start tag + content + end tag). pub async fn write_full(&mut self, into_element: &impl IntoElement) -> Result<()> { let element = into_element.into_element(); self.write_element(&element).await?; - let bytes = &self.inner.ignore_end().take_log(); - let log = str::from_utf8(bytes).unwrap_or("failed to convert bytes written to str"); - info!("wrote element: {}", log); + + let bytes = self.inner.ignore_end().take_log(); + let log = String::from_utf8(bytes) + .map_err(|err| format!("failed to convert bytes written to str: {err}")); + info!("wrote element: {log:?}"); Ok(()) } + /// Write the start tag of an item that implements `IntoElement`. Navigates up the document. pub async fn write_start(&mut self, into_element: &impl IntoElement) -> Result<()> { let element = into_element.into_element(); self.write_element_start(&element).await?; - let bytes = &self.inner.ignore_end().take_log(); - let log = str::from_utf8(bytes).unwrap_or("failed to convert bytes written to str"); - info!("wrote element start: {}", log); + + let bytes = self.inner.ignore_end().take_log(); + let log = String::from_utf8(bytes) + .map_err(|err| format!("failed to convert bytes written to str: {err}")); + info!("wrote element start: {log:?}"); Ok(()) } + /// Write all the inner content (everything within the start and end tag of an xml element) of an item that implements `IntoElement`. In the case of an empty element, write nothing. pub async fn write_all_content(&mut self, into_element: &impl IntoElement) -> Result<()> { for content in &into_element.get_content() { self.write_content(content).await?; } - let bytes = &self.inner.ignore_end().take_log(); - let log = str::from_utf8(bytes).unwrap_or("failed to convert bytes written to str"); - info!("wrote element content: {}", log); + + let bytes = self.inner.ignore_end().take_log(); + let log = String::from_utf8(bytes) + .map_err(|err| format!("failed to convert bytes written to str: {err}")); + info!("wrote element content: {log:?}"); Ok(()) } + /// Write an item that implements `IntoContent`. Could be an element, some text, a comment, etc. Anything that could be included in an element body. pub async fn write(&mut self, into_content: &impl IntoContent) -> Result<()> { let content = into_content.into_content(); self.write_content(&content).await?; - let bytes = &self.inner.ignore_end().take_log(); - let log = str::from_utf8(bytes).unwrap_or("failed to convert bytes written to str"); - info!("wrote element: {}", log); + + let bytes = self.inner.ignore_end().take_log(); + let log = String::from_utf8(bytes) + .map_err(|err| format!("failed to convert bytes written to str: {err}")); + info!("wrote element: {log:?}"); Ok(()) } - // pub async fn write_end(&mut self) + /// Navigate down the document structure and write the end tag for the current element opened in the document context. pub async fn write_end(&mut self) -> Result<()> { self.write_end_tag().await?; - let bytes = &self.inner.ignore_end().take_log(); - let log = str::from_utf8(bytes).unwrap_or("failed to convert bytes written to str"); - info!("wrote element end: {}", log); + + let bytes = self.inner.ignore_end().take_log(); + let log = String::from_utf8(bytes) + .map_err(|err| format!("failed to convert bytes written to str: {err}")); + info!("wrote element end: {log:?}"); Ok(()) } #[async_recursion] + /// Write an `Element`. pub async fn write_element(&mut self, element: &Element) -> Result<()> { if element.content.is_empty() { self.write_empty(element).await?; @@ -125,8 +605,13 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> { Ok(()) } + /// Write an empty element tag from an `Element` (ignoring any content). pub async fn write_empty(&mut self, element: &Element) -> Result<()> { - let writer = self.inner.try_as_mut()?; + let writer = if self.unendable { + self.inner.ignore_end() + } else { + self.inner.try_as_mut()? + }; let mut namespace_declarations_stack: Vec<_> = self .namespace_declarations .iter() @@ -235,8 +720,13 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> { Ok(()) } + /// Write an element start tag from an `Element`, navigating up in document depth. pub async fn write_element_start(&mut self, element: &Element) -> Result<()> { - let writer = self.inner.try_as_mut()?; + let writer = if self.unendable { + self.inner.ignore_end() + } else { + self.inner.try_as_mut()? + }; let mut namespace_declarations_stack: Vec<_> = self .namespace_declarations .iter() @@ -344,14 +834,17 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> { Ok(()) } + /// Write some `Content`. pub async fn write_content(&mut self, content: &Content) -> Result<()> { match content { Content::Element(element) => self.write_element(element).await?, Content::Text(text) => { - self.inner - .try_as_mut()? - .write_all(escape_str(text).as_bytes()) - .await? + let writer = if self.unendable { + self.inner.ignore_end() + } else { + self.inner.try_as_mut()? + }; + writer.write_all(escape_str(text).as_bytes()).await? } // TODO: comments and PI Content::PI => {} @@ -360,8 +853,13 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> { Ok(()) } + /// Write an end tag (depending on the current document context), moving back down in the document. pub async fn write_end_tag(&mut self) -> Result<()> { - let writer = self.inner.try_as_mut()?; + let writer = if self.unendable { + self.inner.ignore_end() + } else { + self.inner.try_as_mut()? + }; if let Some(name) = &self.depth.pop() { let e_tag; let namespace_declarations_stack: Vec<_> = diff --git a/src/endable.rs b/src/writer/endable.rs index 6d842f3..6d842f3 100644 --- a/src/endable.rs +++ b/src/writer/endable.rs diff --git a/src/loggable.rs b/src/writer/loggable.rs index dd69668..1b9f64a 100644 --- a/src/loggable.rs +++ b/src/writer/loggable.rs @@ -5,6 +5,7 @@ use pin_project::pin_project; pub use tokio::io::AsyncWrite; #[pin_project] +/// Wrapper struct for logging writes to `AsyncWrite` implementors. #[derive(Debug)] pub struct Loggable<W> { log_buffer: Vec<u8>, @@ -32,8 +33,8 @@ impl<W> Loggable<W> { impl<W: AsyncWrite + Unpin + Send> Display for Loggable<W> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let str = str::from_utf8(&self.log_buffer).unwrap_or("buffer to string conversion failed"); - f.write_str(str) + let str = String::from_utf8_lossy(&self.log_buffer); + write!(f, "{str}") } } diff --git a/src/xml/mod.rs b/src/xml/mod.rs index b0d9056..005a122 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -328,7 +328,7 @@ impl<'s> Deref for CharData<'s> { /// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' #[derive(Clone, Debug, PartialEq, Eq)] #[repr(transparent)] -pub struct Comment<'s>(&'s str); +pub struct Comment<'s>(pub(crate) &'s str); impl<'s> Deref for Comment<'s> { type Target = &'s str; diff --git a/src/xml/parsers_complete.rs b/src/xml/parsers_complete.rs index f18d0ff..1e2ac31 100644 --- a/src/xml/parsers_complete.rs +++ b/src/xml/parsers_complete.rs @@ -40,7 +40,12 @@ pub trait Parser<'s> { return Err(crate::error::Error::ExtraData(rest.to_string())); } } - Result::Err(e) => return Err(crate::error::Error::ParseError(e.to_string())), + Result::Err(e) => { + return Err(crate::error::Error::ParseError( + input.to_string(), + e.to_string(), + )) + } } } } |