diff options
Diffstat (limited to '')
| -rw-r--r-- | src/parser.rs | 90 | 
1 files changed, 69 insertions, 21 deletions
diff --git a/src/parser.rs b/src/parser.rs index f882064..2acd579 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -30,9 +30,6 @@ struct Attribute<'s> {      value: &'s str,  } -/// Contains only latin characters or dash after first char -type EncName<'s> = &'s str; -  struct DoctypeDecl<'s> {      name: &'s str,      // TODO: doctype declaration parsing @@ -52,15 +49,6 @@ pub fn element(input: &str) -> IResult<&str, Element> {      todo!()  } -enum Misc<'s> { -    Comment(Comment<'s>), -    PI(PI<'s>), -} -/// Misc -pub fn misc(input: &str) -> IResult<&str, Misc> { -    todo!() -} -  type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);  /// [1]   	document	   ::=   	prolog element Misc*  pub fn document(input: &str) -> IResult<&str, Document> { @@ -231,6 +219,7 @@ pub fn comment(input: &str) -> IResult<&str, Comment> {      )(input)  } +#[derive(Clone)]  struct PI<'s> {      target: &'s str,      instruction: Option<&'s str>, @@ -294,7 +283,7 @@ pub fn cd_end(input: &str) -> IResult<&str, CDEnd> {  }  type Prolog<'s> = ( -    Option<XMLDecl>, +    Option<XMLDecl<'s>>,      Vec<Misc<'s>>,      Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,  ); @@ -307,25 +296,26 @@ pub fn prolog(input: &str) -> IResult<&str, Prolog> {      ))(input)  } -struct XMLDecl { +struct XMLDecl<'s> {      version_info: VersionInfo, -    // encoding_decl: Option<EncodingDecl>, -    // sd_decl: Option<SDDecl>, +    encoding_decl: Option<EncodingDecl<'s>>, +    sd_decl: Option<SDDecl>,  }  /// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'  pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {      // (VersionInfo, Option<EncodingDecl>, Option<SDDecl>) -    let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited( +    let (leftover, (version_info, encoding_decl, sd_decl)) = delimited(          tag("<?xml"), -        tuple((version_info /* opt(encoding_decl), opt(sd_decl) */,)), -        tag("?>"), +        tuple((version_info, opt(encoding_decl), opt(sd_decl))), +        pair(opt(s), tag("?>")),      )(input)?; +    // TODO: change to map      Ok((          leftover,          XMLDecl {              version_info, -            // encoding_decl, -            // sd_decl, +            encoding_decl, +            sd_decl,          },      ))  } @@ -363,6 +353,41 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> {      )(input)  } +#[derive(Clone)] +enum Misc<'s> { +    Comment(Comment<'s>), +    PI(PI<'s>), +    S, +} +/// [27]   	Misc	   ::=   	Comment | PI | S +pub fn misc(input: &str) -> IResult<&str, Misc> { +    alt(( +        map(comment, |comment| Misc::Comment(comment)), +        map(pi, |pi| Misc::PI(pi)), +        value(Misc::S, s), +    ))(input) +} + +type SDDecl = bool; +/// [32]   	SDDecl	   ::=   	S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) +pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> { +    preceded( +        tuple((s, tag("standalone"), eq)), +        alt(( +            delimited( +                char('\''), +                alt((value(true, tag("yes")), value(false, tag("no")))), +                char('\''), +            ), +            delimited( +                char('"'), +                alt((value(true, tag("yes")), value(false, tag("no")))), +                char('"'), +            ), +        )), +    )(input) +} +  pub fn reference(input: &str) -> IResult<&str, char> {      todo!()  } @@ -371,6 +396,29 @@ pub fn pe_reference(input: &str) -> IResult<&str, char> {      todo!()  } +type EncodingDecl<'s> = EncName<'s>; +/// [80]   	EncodingDecl	   ::=   	S 'encoding' Eq ('"' EncName '"' | "'" EncName +pub fn encoding_decl(input: &str) -> IResult<&str, EncodingDecl> { +    preceded( +        tuple((s, tag("encoding"), eq)), +        alt(( +            delimited(char('"'), enc_name, char('"')), +            delimited(char('\''), enc_name, char('\'')), +        )), +    )(input) +} + +type EncName<'s> = &'s str; +/// [81]   	EncName	   ::=   	[A-Za-z] ([A-Za-z0-9._] | '-')* +pub fn enc_name(input: &str) -> IResult<&str, EncName> { +    recognize(pair( +        satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )), +        many0(satisfy( +            |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ), +        )), +    ))(input) +} +  #[cfg(test)]  mod tests {      use std::num::NonZero;  | 
