diff options
author | cel 🌸 <cel@blos.sm> | 2024-03-04 16:14:28 +0000 |
---|---|---|
committer | cel 🌸 <cel@blos.sm> | 2024-03-04 16:14:28 +0000 |
commit | 844f3a5d11e4360e9d6bdb79cfed49287aa8b14d (patch) | |
tree | 5525c4f134ef3d7a6082935baa61e6097670c968 | |
download | peanuts-844f3a5d11e4360e9d6bdb79cfed49287aa8b14d.tar.gz peanuts-844f3a5d11e4360e9d6bdb79cfed49287aa8b14d.tar.bz2 peanuts-844f3a5d11e4360e9d6bdb79cfed49287aa8b14d.zip |
initial commit
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Cargo.lock | 273 | ||||
-rw-r--r-- | Cargo.toml | 11 | ||||
-rw-r--r-- | README.md | 14 | ||||
-rw-r--r-- | src/element.rs | 55 | ||||
-rw-r--r-- | src/error.rs | 1 | ||||
-rw-r--r-- | src/event.rs | 1 | ||||
-rw-r--r-- | src/lexer.rs | 9 | ||||
-rw-r--r-- | src/lib.rs | 15 | ||||
-rw-r--r-- | src/parser.rs | 1 | ||||
-rw-r--r-- | src/reader.rs | 32 | ||||
-rw-r--r-- | src/writer.rs | 47 |
12 files changed, 460 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..431cbcc --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,273 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "cc" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0ba8f7aaa012f30d5b2861462f6708eccd49c3c39863fe083a308035f63d723" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "peanuts" +version = "0.1.0" +dependencies = [ + "futures", + "nom", + "tokio", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "syn" +version = "2.0.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tokio" +version = "1.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +dependencies = [ + "backtrace", + "pin-project-lite", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ca9d389 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "peanuts" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +futures = "0.3.30" +nom = "7.1.3" +tokio = "1.36.0" diff --git a/README.md b/README.md new file mode 100644 index 0000000..b8a861f --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# peanuts + +an ergonomic async-first xml reader/writer library focusing on streams + +features: +[ ] CoW +[ ] namespace support +[ ] serialization +[ ] deserialization + +## reader: +read -> lexer -> parser -> events -> elements -> deserialize +## writer: +serialize -> elements -> events -> write diff --git a/src/element.rs b/src/element.rs new file mode 100644 index 0000000..3273ba0 --- /dev/null +++ b/src/element.rs @@ -0,0 +1,55 @@ +// elements resemble a final tree, including inherited namespace information + +use std::collections::HashMap; + +// when are namespaces names chosen then if they are automatically calculated +// namespaces are held by readers and writers. +pub struct Namespace { + prefix: Option<String>, + namespace: String, +} + +// names are qualified, they contain the namespace +pub struct Name { + namespace: String, + name: String, +} + +pub enum Node { + Element(Element), + Text(String), +} + +// should this be a trait? +pub struct Element { + name: Name, + // namespace: (Name, String), // can't have this, must be external method that is called within the context of a reader/writer + // each element once created contains the qualified namespace information for that element + // the name contains the qualified namespace so this is unnecessary + // namespace: String, + // hashmap of explicit namespace declarations on the element itself only + // possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader. + // namespaces: HashMap<Option<String>, String>, + // attributes can be in a different namespace than the element. how to make sure they are valid? + // maybe include the namespace instead of or with the prefix + // you can calculate the prefix from the namespaced name and the current writer context + // you can validate the prefix and calculate the namespace from the current reader context + // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified. + attributes: HashMap<Name, String>, + children: Option<Vec<Node>>, +} + +// example of deriving an element: + +// #[derive(XMLWrite, XMLRead)] +// #[peanuts(namespace = "jabber:client", namespace:stream = "http://etherx.jabber.org/streams", name = "stream:stream")] +// pub struct Stream { +// from: JID, +// id: String, +// to: JID, +// version: String, +// #[peanuts(namespace = "http://www.w3.org/XML/1998/namespace")] +// lang: Lang, +// } + +// note: if an element name has a prefix all unprefixed attributes are qualified by the namespace of the prefix, so in this example from's Name's namespace would be "http://etherx.jabber.org/streams" diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..12fcaf2 --- /dev/null +++ b/src/error.rs @@ -0,0 +1 @@ +pub enum Error {} diff --git a/src/event.rs b/src/event.rs new file mode 100644 index 0000000..1eab55b --- /dev/null +++ b/src/event.rs @@ -0,0 +1 @@ +// tags, declaration, comments, text. individual bits and what they contain, e.g. tag contains attributes and namespace declarations, lang, ONLY within the tag diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..abb5ebd --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,9 @@ +// lexer: tokenizes to bits like '<', '<?', '"', etc. + +pub enum Token { + Whitespace, + OpenTag, + CloseTag, + Slash, + Text(String), +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..5d1046f --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,15 @@ +mod element; +mod error; +mod reader; +mod writer; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..b2a8579 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1 @@ +// parser: parses tokens from lexer into events diff --git a/src/reader.rs b/src/reader.rs new file mode 100644 index 0000000..05afc73 --- /dev/null +++ b/src/reader.rs @@ -0,0 +1,32 @@ +use futures::Stream; +use tokio::io::AsyncRead; + +use crate::{ + element::{Element, Name, Namespace}, + error::Error, +}; + +/// streaming reader that tracks depth and available namespaces at current depth +pub struct Reader<R> { + stream: R, + // holds which tags we are in atm over depth + depth: Vec<Name>, + namespaces: Vec<(usize, Namespace)>, +} + +impl<R: AsyncRead> Reader<R> { + pub async fn read(&self) -> Result<impl From<Element>, Error> {} + pub async fn read_start(&self) -> Result<impl From<Element>, Error> {} + pub async fn read_end(&self) -> Result<(), Error> {} +} + +impl<R: AsyncRead> Stream for Reader<R> { + type Item = impl From<Element>; + + async fn poll_next( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll<Option<Self::Item>> { + todo!() + } +} diff --git a/src/writer.rs b/src/writer.rs new file mode 100644 index 0000000..d7fc037 --- /dev/null +++ b/src/writer.rs @@ -0,0 +1,47 @@ +use futures::{AsyncWrite, Sink}; + +use crate::{ + element::{Element, Name, Namespace}, + error::Error, +}; + +pub struct Writer<W> { + stream: W, + depth: Vec<Name>, + namespaces: Vec<(usize, Namespace)>, +} + +impl<W: AsyncWrite> Writer<W> { + pub async fn write(&self, element: impl Into<Element>) -> Result<(), Error> {} + pub async fn write_start(&self, element: impl Into<Element>) -> Result<(), Error> {} + pub async fn write_end(&self) -> Result<(), Error> {} +} + +impl<W: AsyncWrite, E: Into<Element>> Sink<E> for Writer<W> { + type Error = Error; + + fn poll_ready( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll<Result<(), Self::Error>> { + todo!() + } + + fn start_send(self: std::pin::Pin<&mut Self>, item: E) -> Result<(), Self::Error> { + todo!() + } + + fn poll_flush( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll<Result<(), Self::Error>> { + todo!() + } + + fn poll_close( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll<Result<(), Self::Error>> { + todo!() + } +} |