From bec1a204390f4f4ea60e419331a5903e5f88169e Mon Sep 17 00:00:00 2001
From: cel 🌸 <cel@blos.sm>
Date: Fri, 14 Jun 2024 13:11:32 +0100
Subject: actually get it to build for initial tests

---
 Cargo.lock    | 28 ++++++++++------------
 src/lib.rs    | 11 ---------
 src/parser.rs | 77 ++++++++++++++++++++++++++++++++++-------------------------
 src/reader.rs | 34 ++++++++++++++------------
 src/writer.rs | 12 +++++++---
 5 files changed, 85 insertions(+), 77 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 431cbcc..f9658ed 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
 
 [[package]]
 name = "addr2line"
-version = "0.21.0"
+version = "0.15.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
+checksum = "e7a2e47a1fbe209ee101dd6d61285226744c6c8d3c21c8dc878ba6cb9f467f3a"
 dependencies = [
  "gimli",
 ]
@@ -25,9 +25,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
 [[package]]
 name = "backtrace"
-version = "0.3.69"
+version = "0.3.59"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
+checksum = "4717cfcbfaa661a0fd48f8453951837ae7e8f81e481fbb136e3202d72805a744"
 dependencies = [
  "addr2line",
  "cc",
@@ -141,9 +141,9 @@ dependencies = [
 
 [[package]]
 name = "gimli"
-version = "0.28.1"
+version = "0.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
+checksum = "0e4075386626662786ddb0ec9081e7c7eeb1ba31951f447ca780ef9f5d568189"
 
 [[package]]
 name = "libc"
@@ -153,9 +153,9 @@ checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
 
 [[package]]
 name = "memchr"
-version = "2.7.1"
+version = "2.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
+checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
 
 [[package]]
 name = "minimal-lexical"
@@ -165,11 +165,12 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
 
 [[package]]
 name = "miniz_oxide"
-version = "0.7.2"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
+checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b"
 dependencies = [
  "adler",
+ "autocfg",
 ]
 
 [[package]]
@@ -184,12 +185,9 @@ dependencies = [
 
 [[package]]
 name = "object"
-version = "0.32.2"
+version = "0.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
-dependencies = [
- "memchr",
-]
+checksum = "1a5b3dd1c072ee7963717671d1ca129f1048fda25edea6b752bfc71ac8854170"
 
 [[package]]
 name = "peanuts"
diff --git a/src/lib.rs b/src/lib.rs
index 3d71373..5430fd5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,14 +3,3 @@ mod error;
 mod parser;
 mod reader;
 mod writer;
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn it_works() {
-        let result = 2 + 2;
-        assert_eq!(result, 4);
-    }
-}
diff --git a/src/parser.rs b/src/parser.rs
index 518aad4..07d48c6 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -2,10 +2,7 @@ use std::char;
 
 use nom::{
     branch::alt,
-    bytes::{
-        complete::take_until,
-        streaming::{is_a, tag, take},
-    },
+    bytes::streaming::{is_a, tag, take, take_till, take_until},
     character::{
         complete::one_of,
         streaming::{char, digit1, none_of, satisfy},
@@ -19,11 +16,6 @@ use nom::{
 
 // parser: parses tokens from lexer into events
 
-enum Misc<'s> {
-    Comment(Comment<'s>),
-    PI(PI<'s>),
-}
-
 type Comment<'s> = &'s str;
 
 struct PI<'s> {
@@ -34,46 +26,44 @@ struct PI<'s> {
 enum ContentItem<'s> {
     CharData(&'s str),
     Element(Element<'s>),
-    Reference(Reference<'s>),
-    CDSect(CDSect<'s>),
+    // Reference(Reference<'s>),
+    // CDSect(CDSect<'s>),
 }
 
 type Content<'s> = Option<Vec<ContentItem<'s>>>;
 
-struct Element<'s> {
-    name: &'s str,
-    attributes: Vec<Attribute<'s>>,
-    content: Content<'s>,
-}
-
 struct Attribute<'s> {
     key: &'s str,
     value: &'s str,
 }
 
-// type VersionNum<'s> = &'s str;
 /// Contains only latin characters or dash after first char
 type EncName<'s> = &'s str;
 
-// struct XMLDecl<'s> {
-//     version_info: VersionNum<'s>,
-//     encoding_decl: Option<EncName<'s>>,
-//     sd_decl: Option<bool>,
-// }
-
 struct DoctypeDecl<'s> {
     name: &'s str,
-    // TODO
+    // TODO: doctype declaration parsing
 }
-
+///
 pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
     todo!()
 }
 
+struct Element<'s> {
+    name: &'s str,
+    attributes: Vec<Attribute<'s>>,
+    content: Content<'s>,
+}
+/// Element
 pub fn element(input: &str) -> IResult<&str, Element> {
     todo!()
 }
 
+enum Misc<'s> {
+    Comment(Comment<'s>),
+    PI(PI<'s>),
+}
+/// Misc
 pub fn misc(input: &str) -> IResult<&str, Misc> {
     todo!()
 }
@@ -210,7 +200,7 @@ pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> {
 type CharData<'s> = &'s str;
 /// [14]   	CharData	   ::=   	[^<&]* - ([^<&]* ']]>' [^<&]*)
 pub fn char_data(input: &str) -> IResult<&str, CharData> {
-    take_until()(input)
+    recognize(take_until("]]>").and_then(take_till(|c| c == '<' || c == '&')))(input)
 }
 
 type Prolog<'s> = (
@@ -229,23 +219,23 @@ pub fn prolog(input: &str) -> IResult<&str, Prolog> {
 
 struct XMLDecl {
     version_info: VersionInfo,
-    encoding_decl: Option<EncodingDecl>,
-    sd_decl: Option<SDDecl>,
+    // encoding_decl: Option<EncodingDecl>,
+    // sd_decl: Option<SDDecl>,
 }
 /// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
 pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {
     // (VersionInfo, Option<EncodingDecl>, Option<SDDecl>)
-    let (leftover, (version_info, encoding_decl, sd_decl)) = delimited(
+    let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited(
         tag("<?xml"),
-        tuple((version_info, opt(encoding_decl), opt(sd_decl))),
+        tuple((version_info /* opt(encoding_decl), opt(sd_decl) */,)),
         tag("?>"),
     )(input)?;
     Ok((
         leftover,
         XMLDecl {
             version_info,
-            encoding_decl,
-            sd_decl,
+            // encoding_decl,
+            // sd_decl,
         },
     ))
 }
@@ -282,3 +272,24 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> {
         )),
     )(input)
 }
+
+pub fn reference(input: &str) -> IResult<&str, char> {
+    todo!()
+}
+
+pub fn pe_reference(input: &str) -> IResult<&str, char> {
+    todo!()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_char_data() {
+        assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi"));
+        assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi"));
+        assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi"));
+        assert_eq!(Ok(("", "abcdefghi")), char_data("abcdefghi"));
+    }
+}
diff --git a/src/reader.rs b/src/reader.rs
index 26e540e..6e622f4 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -18,21 +18,25 @@ impl<R> Reader<R>
 where
     R: AsyncBufRead,
 {
-    pub async fn read(&self) -> Result<impl From<Element>, Error> {
-        let buf = self.stream.poll_fill_buf().await?;
-        todo!()
-    }
-    pub async fn read_start(&self) -> Result<impl From<Element>, Error> {}
-    pub async fn read_end(&self) -> Result<(), Error> {}
+    // pub async fn read(&self) -> Result<impl From<Element>, Error> {
+    // let buf = self.stream.poll_fill_buf().await?;
+    // todo!()
+    // }
+    // pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
+    //     todo!()
+    // }
+    // pub async fn read_end(&self) -> Result<(), Error> {
+    //     todo!()
+    // }
 }
 
-impl<R: AsyncBufRead> Stream for Reader<R> {
-    type Item = impl From<Element>;
+// impl<R: AsyncBufRead> Stream for Reader<R> {
+//     type Item = impl From<Element>;
 
-    async fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Option<Self::Item>> {
-        todo!()
-    }
-}
+//     async fn poll_next(
+//         self: std::pin::Pin<&mut Self>,
+//         cx: &mut std::task::Context<'_>,
+//     ) -> std::task::Poll<Option<Self::Item>> {
+//         todo!()
+//     }
+// }
diff --git a/src/writer.rs b/src/writer.rs
index d7fc037..456a5a1 100644
--- a/src/writer.rs
+++ b/src/writer.rs
@@ -12,9 +12,15 @@ pub struct Writer<W> {
 }
 
 impl<W: AsyncWrite> Writer<W> {
-    pub async fn write(&self, element: impl Into<Element>) -> Result<(), Error> {}
-    pub async fn write_start(&self, element: impl Into<Element>) -> Result<(), Error> {}
-    pub async fn write_end(&self) -> Result<(), Error> {}
+    pub async fn write(&self, element: impl Into<Element>) -> Result<(), Error> {
+        todo!()
+    }
+    pub async fn write_start(&self, element: impl Into<Element>) -> Result<(), Error> {
+        todo!()
+    }
+    pub async fn write_end(&self) -> Result<(), Error> {
+        todo!()
+    }
 }
 
 impl<W: AsyncWrite, E: Into<Element>> Sink<E> for Writer<W> {
-- 
cgit