aboutsummaryrefslogtreecommitdiffstats
path: root/src/parser.rs
diff options
context:
space:
mode:
authorLibravatar cel 🌸 <cel@blos.sm>2024-06-21 17:08:45 +0100
committerLibravatar cel 🌸 <cel@blos.sm>2024-06-21 17:08:45 +0100
commit3a875666a5a897d92a9c6d92a67867bcae662211 (patch)
tree6b8619b924dd30232a715acf9d5db35a60ca2262 /src/parser.rs
parent0a353135c04d1639cad7b0b881dd7b464a1989e1 (diff)
downloadpeanuts-3a875666a5a897d92a9c6d92a67867bcae662211.tar.gz
peanuts-3a875666a5a897d92a9c6d92a67867bcae662211.tar.bz2
peanuts-3a875666a5a897d92a9c6d92a67867bcae662211.zip
WIP: XMLDecl stuff
Diffstat (limited to 'src/parser.rs')
-rw-r--r--src/parser.rs90
1 files changed, 69 insertions, 21 deletions
diff --git a/src/parser.rs b/src/parser.rs
index f882064..2acd579 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -30,9 +30,6 @@ struct Attribute<'s> {
value: &'s str,
}
-/// Contains only latin characters or dash after first char
-type EncName<'s> = &'s str;
-
struct DoctypeDecl<'s> {
name: &'s str,
// TODO: doctype declaration parsing
@@ -52,15 +49,6 @@ pub fn element(input: &str) -> IResult<&str, Element> {
todo!()
}
-enum Misc<'s> {
- Comment(Comment<'s>),
- PI(PI<'s>),
-}
-/// Misc
-pub fn misc(input: &str) -> IResult<&str, Misc> {
- todo!()
-}
-
type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [1] document ::= prolog element Misc*
pub fn document(input: &str) -> IResult<&str, Document> {
@@ -231,6 +219,7 @@ pub fn comment(input: &str) -> IResult<&str, Comment> {
)(input)
}
+#[derive(Clone)]
struct PI<'s> {
target: &'s str,
instruction: Option<&'s str>,
@@ -294,7 +283,7 @@ pub fn cd_end(input: &str) -> IResult<&str, CDEnd> {
}
type Prolog<'s> = (
- Option<XMLDecl>,
+ Option<XMLDecl<'s>>,
Vec<Misc<'s>>,
Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,
);
@@ -307,25 +296,26 @@ pub fn prolog(input: &str) -> IResult<&str, Prolog> {
))(input)
}
-struct XMLDecl {
+struct XMLDecl<'s> {
version_info: VersionInfo,
- // encoding_decl: Option<EncodingDecl>,
- // sd_decl: Option<SDDecl>,
+ encoding_decl: Option<EncodingDecl<'s>>,
+ sd_decl: Option<SDDecl>,
}
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {
// (VersionInfo, Option<EncodingDecl>, Option<SDDecl>)
- let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited(
+ let (leftover, (version_info, encoding_decl, sd_decl)) = delimited(
tag("<?xml"),
- tuple((version_info /* opt(encoding_decl), opt(sd_decl) */,)),
- tag("?>"),
+ tuple((version_info, opt(encoding_decl), opt(sd_decl))),
+ pair(opt(s), tag("?>")),
)(input)?;
+ // TODO: change to map
Ok((
leftover,
XMLDecl {
version_info,
- // encoding_decl,
- // sd_decl,
+ encoding_decl,
+ sd_decl,
},
))
}
@@ -363,6 +353,41 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> {
)(input)
}
+#[derive(Clone)]
+enum Misc<'s> {
+ Comment(Comment<'s>),
+ PI(PI<'s>),
+ S,
+}
+/// [27] Misc ::= Comment | PI | S
+pub fn misc(input: &str) -> IResult<&str, Misc> {
+ alt((
+ map(comment, |comment| Misc::Comment(comment)),
+ map(pi, |pi| Misc::PI(pi)),
+ value(Misc::S, s),
+ ))(input)
+}
+
+type SDDecl = bool;
+/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
+pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
+ preceded(
+ tuple((s, tag("standalone"), eq)),
+ alt((
+ delimited(
+ char('\''),
+ alt((value(true, tag("yes")), value(false, tag("no")))),
+ char('\''),
+ ),
+ delimited(
+ char('"'),
+ alt((value(true, tag("yes")), value(false, tag("no")))),
+ char('"'),
+ ),
+ )),
+ )(input)
+}
+
pub fn reference(input: &str) -> IResult<&str, char> {
todo!()
}
@@ -371,6 +396,29 @@ pub fn pe_reference(input: &str) -> IResult<&str, char> {
todo!()
}
+type EncodingDecl<'s> = EncName<'s>;
+/// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName
+pub fn encoding_decl(input: &str) -> IResult<&str, EncodingDecl> {
+ preceded(
+ tuple((s, tag("encoding"), eq)),
+ alt((
+ delimited(char('"'), enc_name, char('"')),
+ delimited(char('\''), enc_name, char('\'')),
+ )),
+ )(input)
+}
+
+type EncName<'s> = &'s str;
+/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
+pub fn enc_name(input: &str) -> IResult<&str, EncName> {
+ recognize(pair(
+ satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )),
+ many0(satisfy(
+ |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ),
+ )),
+ ))(input)
+}
+
#[cfg(test)]
mod tests {
use std::num::NonZero;