diff options
author | cel 🌸 <cel@blos.sm> | 2024-06-27 20:22:16 +0100 |
---|---|---|
committer | cel 🌸 <cel@blos.sm> | 2024-06-27 20:22:16 +0100 |
commit | 1f0103cbecc6a4dfe3f34fb6441d4d491b385142 (patch) | |
tree | 6551f950332e5c913f33720e92c88cdf67ed5832 /src | |
parent | c08b4504ab326203b2c11abe566e518b6466613a (diff) | |
download | peanuts-1f0103cbecc6a4dfe3f34fb6441d4d491b385142.tar.gz peanuts-1f0103cbecc6a4dfe3f34fb6441d4d491b385142.tar.bz2 peanuts-1f0103cbecc6a4dfe3f34fb6441d4d491b385142.zip |
WIP: stream parsing
Diffstat (limited to 'src')
-rw-r--r-- | src/element.rs | 4 | ||||
-rw-r--r-- | src/error.rs | 20 | ||||
-rw-r--r-- | src/main.rs | 2 | ||||
-rw-r--r-- | src/reader.rs | 46 |
4 files changed, 61 insertions, 11 deletions
diff --git a/src/element.rs b/src/element.rs index 3273ba0..4dcb616 100644 --- a/src/element.rs +++ b/src/element.rs @@ -23,13 +23,13 @@ pub enum Node { // should this be a trait? pub struct Element { name: Name, - // namespace: (Name, String), // can't have this, must be external method that is called within the context of a reader/writer + // namespace: Name, // each element once created contains the qualified namespace information for that element // the name contains the qualified namespace so this is unnecessary // namespace: String, // hashmap of explicit namespace declarations on the element itself only // possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader. - // namespaces: HashMap<Option<String>, String>, + namespaces: HashMap<Option<String>, String>, // attributes can be in a different namespace than the element. how to make sure they are valid? // maybe include the namespace instead of or with the prefix // you can calculate the prefix from the namespaced name and the current writer context diff --git a/src/error.rs b/src/error.rs index 12fcaf2..78508ae 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1 +1,19 @@ -pub enum Error {} +use std::str::Utf8Error; + +pub enum Error { + ReadError(std::io::Error), + Utf8Error(Utf8Error), + ParseError(String), +} + +impl From<std::io::Error> for Error { + fn from(e: std::io::Error) -> Self { + Self::ReadError(e) + } +} + +impl From<Utf8Error> for Error { + fn from(e: Utf8Error) -> Self { + Self::Utf8Error(e) + } +} diff --git a/src/main.rs b/src/main.rs index 424046e..b08c197 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use peanuts::parser::document; +use peanuts::xml::document; fn main() { let document = document( diff --git a/src/reader.rs b/src/reader.rs index 6e622f4..2785c88 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1,5 +1,7 @@ use futures::Stream; -use tokio::io::AsyncBufRead; +use nom::Err; +use std::str; +use tokio::io::AsyncBufReadExt; use crate::{ element::{Element, Name, Namespace}, @@ -8,20 +10,50 @@ use crate::{ /// streaming reader that tracks depth and available namespaces at current depth pub struct Reader<R> { - stream: R, + inner: R, // holds which tags we are in atm over depth depth: Vec<Name>, namespaces: Vec<(usize, Namespace)>, } +impl<R> Reader<R> { + pub fn new(reader: R) -> Self { + Self { + inner: reader, + depth: Vec::new(), + namespaces: Vec::new(), + } + } +} + impl<R> Reader<R> where - R: AsyncBufRead, + R: AsyncBufReadExt + Unpin, { - // pub async fn read(&self) -> Result<impl From<Element>, Error> { - // let buf = self.stream.poll_fill_buf().await?; - // todo!() - // } + /// reads entire next prolog, element, or misc + pub async fn read<'s>(&'s mut self) -> Result<crate::xml::Element<'s>, Error> { + let element; + let len; + loop { + let buf = self.inner.fill_buf().await?; + let input = str::from_utf8(buf)?; + match crate::xml::element(input) { + Ok((rest, e)) => { + element = e; + len = buf.len() - rest.len(); + break; + } + Err(e) => match e { + Err::Incomplete(_) => (), + e => return Err(Error::ParseError(input.to_owned())), + }, + } + } + self.inner.consume(len); + + // Ok(element) + todo!() + } // pub async fn read_start(&self) -> Result<impl From<Element>, Error> { // todo!() // } |