From b9d88e5c6f81fadcb4bcceb3326383fc2fb13eb5 Mon Sep 17 00:00:00 2001 From: cel 🌸 Date: Tue, 25 Jun 2024 22:44:47 +0100 Subject: WIP: conditional sections --- src/parser.rs | 247 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 243 insertions(+), 4 deletions(-) (limited to 'src/parser.rs') diff --git a/src/parser.rs b/src/parser.rs index bae9737..a37fc17 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -82,6 +82,7 @@ pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> { recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input) } +#[derive(Clone)] enum LiteralData<'s> { String(&'s str), PEReference(PEReference<'s>), @@ -731,8 +732,8 @@ pub fn choice(input: &str) -> IResult<&str, Choice> { pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))), pair(opt(s), tag(")")), ), - |(first, rest)| { - let choice = vec![vec![first], rest].concat(); + |(head, tail)| { + let choice = vec![vec![head], tail].concat(); Choice(choice) }, )(input) @@ -748,8 +749,8 @@ pub fn seq(input: &str) -> IResult<&str, Seq> { pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))), pair(opt(s), tag(")")), ), - |(first, rest)| { - let seq = vec![vec![first], rest].concat(); + |(head, tail)| { + let seq = vec![vec![head], tail].concat(); Seq(seq) }, )(input) @@ -776,6 +777,243 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> { ))(input) } +struct AttlistDecl<'s> { + element_type: Name<'s>, + att_defs: Vec>, +} +/// [52] AttlistDecl ::= '' +pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> { + map( + delimited( + pair(tag("")), + ), + |(element_type, att_defs)| AttlistDecl { + element_type, + att_defs, + }, + )(input) +} + +struct AttDef<'s> { + name: Name<'s>, + att_type: AttType<'s>, + default_decl: DefaultDecl<'s>, +} +/// [53] AttDef ::= S Name S AttType S DefaultDecl +pub fn att_def(input: &str) -> IResult<&str, AttDef> { + map( + tuple(( + preceded(s, name), + preceded(s, att_type), + preceded(s, default_decl), + )), + |(name, att_type, default_decl)| AttDef { + name, + att_type, + default_decl, + }, + )(input) +} + +#[derive(Clone)] +enum AttType<'s> { + StringType, + TokenizedType(TokenizedType), + EnumeratedType(EnumeratedType<'s>), +} +/// [54] AttType ::= StringType | TokenizedType | EnumeratedType +pub fn att_type(input: &str) -> IResult<&str, AttType> { + alt(( + value(AttType::StringType, string_type), + map(tokenized_type, |tokenized_type| { + AttType::TokenizedType(tokenized_type) + }), + map(enumerated_type, |enumerated_type| { + AttType::EnumeratedType(enumerated_type) + }), + ))(input) +} + +type StringType<'s> = &'s str; +/// [55] StringType ::= 'CDATA' +pub fn string_type(input: &str) -> IResult<&str, StringType> { + tag("CDATA")(input) +} + +#[derive(Clone)] +enum TokenizedType { + ID, + IDRef, + IDRefs, + Entity, + Entities, + NMToken, + NMTokens, +} +/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' +pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> { + alt(( + value(TokenizedType::ID, tag("ID")), + // TODO: check if this is required + // try idrefs first to avoid losing 'S' + value(TokenizedType::IDRefs, tag("IDREFS")), + value(TokenizedType::IDRef, tag("IDREF")), + value(TokenizedType::Entity, tag("ENTITY")), + value(TokenizedType::Entities, tag("ENTITIES")), + // same here + value(TokenizedType::NMTokens, tag("NMTOKENS")), + value(TokenizedType::NMToken, tag("NMTOKEN")), + ))(input) +} + +#[derive(Clone)] +enum EnumeratedType<'s> { + NotationType(NotationType<'s>), + Enumeration(Enumeration<'s>), +} +/// [57] EnumeratedType ::= NotationType | Enumeration +pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> { + alt(( + map(notation_type, |notation_type| { + EnumeratedType::NotationType(notation_type) + }), + map(enumeration, |enumeration| { + EnumeratedType::Enumeration(enumeration) + }), + ))(input) +} + +#[derive(Clone)] +struct NotationType<'s>(Vec>); +/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' +pub fn notation_type(input: &str) -> IResult<&str, NotationType> { + map( + delimited( + tuple((tag("NOTATION"), s, tag("("), opt(s))), + pair( + name, + many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)), + ), + pair(opt(s), tag(")")), + ), + |(head, tail)| { + let notation_type = vec![vec![head], tail].concat(); + NotationType(notation_type) + }, + )(input) +} + +#[derive(Clone)] +struct Enumeration<'s>(Vec>); +/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' +pub fn enumeration(input: &str) -> IResult<&str, Enumeration> { + map( + delimited( + pair(tag("("), opt(s)), + pair( + nmtoken, + many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)), + ), + pair(opt(s), tag(")")), + ), + |(head, tail)| { + let enumeration = vec![vec![head], tail].concat(); + Enumeration(enumeration) + }, + )(input) +} + +#[derive(Clone)] +enum DefaultDecl<'s> { + Required, + Implied, + Fixed(AttValue<'s>), +} +/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) +pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> { + alt(( + value(DefaultDecl::Required, tag("#REQUIRED")), + value(DefaultDecl::Implied, tag("#IMPLIED")), + map( + preceded(opt(pair(tag("#FIXED"), s)), att_value), + |att_value| DefaultDecl::Fixed(att_value), + ), + ))(input) +} + +enum ConditionalSect<'s> { + IncludeSect(IncludeSect<'s>), + IgnoreSect(IgnoreSect<'s>), +} +/// [61] conditionalSect ::= includeSect | ignoreSect +pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> { + alt(( + map(include_sect, |include_sect| { + ConditionalSect::IncludeSect(include_sect) + }), + map(ignore_sect, |ignore_sect| { + ConditionalSect::IgnoreSect(ignore_sect) + }), + ))(input) +} + +struct IncludeSect<'s>(ExtSubsetDecl<'s>); +/// [62] includeSect ::= '' +pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> { + map( + delimited( + tuple((tag(""), + ), + |ext_subset_decl| IncludeSect(ext_subset_decl), + )(input) +} + +struct IgnoreSect<'s>(Vec>); +/// [63] ignoreSect ::= '' +pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> { + map( + delimited( + tuple((tag(""), + ), + |ignore_sect_contents| IgnoreSect(ignore_sect_contents), + )(input) +} + +struct IgnoreSectContents<'s> { + // TODO: what the fuck does this mean + ignore: Ignore<'s>, + ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>, +} +/// [64] ignoreSectContents ::= Ignore ('' Ignore)* +pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> { + map( + pair( + ignore, + many0(tuple(( + delimited(tag("")), + ignore, + ))), + ), + |(ignore, ignore_list)| IgnoreSectContents { + ignore, + ignore_list, + }, + )(input) +} + +type Ignore<'s> = &'s str; +/// [65] Ignore ::= Char* - (Char* ('') Char*) +pub fn ignore(input: &str) -> IResult<&str, Ignore> { + recognize(many_till(xmlchar, peek(alt((tag(""))))))(input) +} + +#[derive(Clone)] enum CharRef<'s> { Decimal(&'s str), Hexadecimal(&'s str), @@ -801,6 +1039,7 @@ pub fn char_ref(input: &str) -> IResult<&str, CharRef> { ))(input) } +#[derive(Clone)] enum Reference<'s> { EntityRef(EntityRef<'s>), CharRef(CharRef<'s>), -- cgit