diff options
author | cel 🌸 <cel@blos.sm> | 2024-06-21 17:08:45 +0100 |
---|---|---|
committer | cel 🌸 <cel@blos.sm> | 2024-06-21 17:08:45 +0100 |
commit | 3a875666a5a897d92a9c6d92a67867bcae662211 (patch) | |
tree | 6b8619b924dd30232a715acf9d5db35a60ca2262 /src/parser.rs | |
parent | 0a353135c04d1639cad7b0b881dd7b464a1989e1 (diff) | |
download | peanuts-3a875666a5a897d92a9c6d92a67867bcae662211.tar.gz peanuts-3a875666a5a897d92a9c6d92a67867bcae662211.tar.bz2 peanuts-3a875666a5a897d92a9c6d92a67867bcae662211.zip |
WIP: XMLDecl stuff
Diffstat (limited to 'src/parser.rs')
-rw-r--r-- | src/parser.rs | 90 |
1 files changed, 69 insertions, 21 deletions
diff --git a/src/parser.rs b/src/parser.rs index f882064..2acd579 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -30,9 +30,6 @@ struct Attribute<'s> { value: &'s str, } -/// Contains only latin characters or dash after first char -type EncName<'s> = &'s str; - struct DoctypeDecl<'s> { name: &'s str, // TODO: doctype declaration parsing @@ -52,15 +49,6 @@ pub fn element(input: &str) -> IResult<&str, Element> { todo!() } -enum Misc<'s> { - Comment(Comment<'s>), - PI(PI<'s>), -} -/// Misc -pub fn misc(input: &str) -> IResult<&str, Misc> { - todo!() -} - type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>); /// [1] document ::= prolog element Misc* pub fn document(input: &str) -> IResult<&str, Document> { @@ -231,6 +219,7 @@ pub fn comment(input: &str) -> IResult<&str, Comment> { )(input) } +#[derive(Clone)] struct PI<'s> { target: &'s str, instruction: Option<&'s str>, @@ -294,7 +283,7 @@ pub fn cd_end(input: &str) -> IResult<&str, CDEnd> { } type Prolog<'s> = ( - Option<XMLDecl>, + Option<XMLDecl<'s>>, Vec<Misc<'s>>, Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>, ); @@ -307,25 +296,26 @@ pub fn prolog(input: &str) -> IResult<&str, Prolog> { ))(input) } -struct XMLDecl { +struct XMLDecl<'s> { version_info: VersionInfo, - // encoding_decl: Option<EncodingDecl>, - // sd_decl: Option<SDDecl>, + encoding_decl: Option<EncodingDecl<'s>>, + sd_decl: Option<SDDecl>, } /// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> { // (VersionInfo, Option<EncodingDecl>, Option<SDDecl>) - let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited( + let (leftover, (version_info, encoding_decl, sd_decl)) = delimited( tag("<?xml"), - tuple((version_info /* opt(encoding_decl), opt(sd_decl) */,)), - tag("?>"), + tuple((version_info, opt(encoding_decl), opt(sd_decl))), + pair(opt(s), tag("?>")), )(input)?; + // TODO: change to map Ok(( leftover, XMLDecl { version_info, - // encoding_decl, - // sd_decl, + encoding_decl, + sd_decl, }, )) } @@ -363,6 +353,41 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> { )(input) } +#[derive(Clone)] +enum Misc<'s> { + Comment(Comment<'s>), + PI(PI<'s>), + S, +} +/// [27] Misc ::= Comment | PI | S +pub fn misc(input: &str) -> IResult<&str, Misc> { + alt(( + map(comment, |comment| Misc::Comment(comment)), + map(pi, |pi| Misc::PI(pi)), + value(Misc::S, s), + ))(input) +} + +type SDDecl = bool; +/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) +pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> { + preceded( + tuple((s, tag("standalone"), eq)), + alt(( + delimited( + char('\''), + alt((value(true, tag("yes")), value(false, tag("no")))), + char('\''), + ), + delimited( + char('"'), + alt((value(true, tag("yes")), value(false, tag("no")))), + char('"'), + ), + )), + )(input) +} + pub fn reference(input: &str) -> IResult<&str, char> { todo!() } @@ -371,6 +396,29 @@ pub fn pe_reference(input: &str) -> IResult<&str, char> { todo!() } +type EncodingDecl<'s> = EncName<'s>; +/// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName +pub fn encoding_decl(input: &str) -> IResult<&str, EncodingDecl> { + preceded( + tuple((s, tag("encoding"), eq)), + alt(( + delimited(char('"'), enc_name, char('"')), + delimited(char('\''), enc_name, char('\'')), + )), + )(input) +} + +type EncName<'s> = &'s str; +/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* +pub fn enc_name(input: &str) -> IResult<&str, EncName> { + recognize(pair( + satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )), + many0(satisfy( + |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ), + )), + ))(input) +} + #[cfg(test)] mod tests { use std::num::NonZero; |