diff options
| author | 2024-06-25 22:44:47 +0100 | |
|---|---|---|
| committer | 2024-06-25 22:44:47 +0100 | |
| commit | b9d88e5c6f81fadcb4bcceb3326383fc2fb13eb5 (patch) | |
| tree | 7131c66f32d485b560fc61a97030acd6a6c950cb /src | |
| parent | 435b2af24cc6a1e8d382ca817bef6a94510127f3 (diff) | |
| download | peanuts-b9d88e5c6f81fadcb4bcceb3326383fc2fb13eb5.tar.gz peanuts-b9d88e5c6f81fadcb4bcceb3326383fc2fb13eb5.tar.bz2 peanuts-b9d88e5c6f81fadcb4bcceb3326383fc2fb13eb5.zip | |
WIP: conditional sections
Diffstat (limited to '')
| -rw-r--r-- | src/parser.rs | 247 | 
1 files changed, 243 insertions, 4 deletions
| diff --git a/src/parser.rs b/src/parser.rs index bae9737..a37fc17 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -82,6 +82,7 @@ pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {      recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)  } +#[derive(Clone)]  enum LiteralData<'s> {      String(&'s str),      PEReference(PEReference<'s>), @@ -731,8 +732,8 @@ pub fn choice(input: &str) -> IResult<&str, Choice> {              pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))),              pair(opt(s), tag(")")),          ), -        |(first, rest)| { -            let choice = vec![vec![first], rest].concat(); +        |(head, tail)| { +            let choice = vec![vec![head], tail].concat();              Choice(choice)          },      )(input) @@ -748,8 +749,8 @@ pub fn seq(input: &str) -> IResult<&str, Seq> {              pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))),              pair(opt(s), tag(")")),          ), -        |(first, rest)| { -            let seq = vec![vec![first], rest].concat(); +        |(head, tail)| { +            let seq = vec![vec![head], tail].concat();              Seq(seq)          },      )(input) @@ -776,6 +777,243 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> {      ))(input)  } +struct AttlistDecl<'s> { +    element_type: Name<'s>, +    att_defs: Vec<AttDef<'s>>, +} +/// [52]   	AttlistDecl	   ::=   	'<!ATTLIST' S Name AttDef* S? '>' +pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> { +    map( +        delimited( +            pair(tag("<!ATTLIST"), s), +            pair(name, many0(att_def)), +            pair(opt(s), tag(">")), +        ), +        |(element_type, att_defs)| AttlistDecl { +            element_type, +            att_defs, +        }, +    )(input) +} + +struct AttDef<'s> { +    name: Name<'s>, +    att_type: AttType<'s>, +    default_decl: DefaultDecl<'s>, +} +/// [53]   	AttDef	   ::=   	S Name S AttType S DefaultDecl +pub fn att_def(input: &str) -> IResult<&str, AttDef> { +    map( +        tuple(( +            preceded(s, name), +            preceded(s, att_type), +            preceded(s, default_decl), +        )), +        |(name, att_type, default_decl)| AttDef { +            name, +            att_type, +            default_decl, +        }, +    )(input) +} + +#[derive(Clone)] +enum AttType<'s> { +    StringType, +    TokenizedType(TokenizedType), +    EnumeratedType(EnumeratedType<'s>), +} +/// [54]   	AttType	   ::=   	StringType | TokenizedType | EnumeratedType +pub fn att_type(input: &str) -> IResult<&str, AttType> { +    alt(( +        value(AttType::StringType, string_type), +        map(tokenized_type, |tokenized_type| { +            AttType::TokenizedType(tokenized_type) +        }), +        map(enumerated_type, |enumerated_type| { +            AttType::EnumeratedType(enumerated_type) +        }), +    ))(input) +} + +type StringType<'s> = &'s str; +/// [55]   	StringType	   ::=   	'CDATA' +pub fn string_type(input: &str) -> IResult<&str, StringType> { +    tag("CDATA")(input) +} + +#[derive(Clone)] +enum TokenizedType { +    ID, +    IDRef, +    IDRefs, +    Entity, +    Entities, +    NMToken, +    NMTokens, +} +/// [56]   	TokenizedType	   ::=   	'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' +pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> { +    alt(( +        value(TokenizedType::ID, tag("ID")), +        // TODO: check if this is required +        // try idrefs first to avoid losing 'S' +        value(TokenizedType::IDRefs, tag("IDREFS")), +        value(TokenizedType::IDRef, tag("IDREF")), +        value(TokenizedType::Entity, tag("ENTITY")), +        value(TokenizedType::Entities, tag("ENTITIES")), +        // same here +        value(TokenizedType::NMTokens, tag("NMTOKENS")), +        value(TokenizedType::NMToken, tag("NMTOKEN")), +    ))(input) +} + +#[derive(Clone)] +enum EnumeratedType<'s> { +    NotationType(NotationType<'s>), +    Enumeration(Enumeration<'s>), +} +/// [57]   	EnumeratedType	   ::=   	NotationType | Enumeration +pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> { +    alt(( +        map(notation_type, |notation_type| { +            EnumeratedType::NotationType(notation_type) +        }), +        map(enumeration, |enumeration| { +            EnumeratedType::Enumeration(enumeration) +        }), +    ))(input) +} + +#[derive(Clone)] +struct NotationType<'s>(Vec<Name<'s>>); +/// [58]   	NotationType	   ::=   	'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' +pub fn notation_type(input: &str) -> IResult<&str, NotationType> { +    map( +        delimited( +            tuple((tag("NOTATION"), s, tag("("), opt(s))), +            pair( +                name, +                many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)), +            ), +            pair(opt(s), tag(")")), +        ), +        |(head, tail)| { +            let notation_type = vec![vec![head], tail].concat(); +            NotationType(notation_type) +        }, +    )(input) +} + +#[derive(Clone)] +struct Enumeration<'s>(Vec<Nmtoken<'s>>); +/// [59]   	Enumeration	   ::=   	'(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' +pub fn enumeration(input: &str) -> IResult<&str, Enumeration> { +    map( +        delimited( +            pair(tag("("), opt(s)), +            pair( +                nmtoken, +                many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)), +            ), +            pair(opt(s), tag(")")), +        ), +        |(head, tail)| { +            let enumeration = vec![vec![head], tail].concat(); +            Enumeration(enumeration) +        }, +    )(input) +} + +#[derive(Clone)] +enum DefaultDecl<'s> { +    Required, +    Implied, +    Fixed(AttValue<'s>), +} +/// [60]   	DefaultDecl	   ::=   	'#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) +pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> { +    alt(( +        value(DefaultDecl::Required, tag("#REQUIRED")), +        value(DefaultDecl::Implied, tag("#IMPLIED")), +        map( +            preceded(opt(pair(tag("#FIXED"), s)), att_value), +            |att_value| DefaultDecl::Fixed(att_value), +        ), +    ))(input) +} + +enum ConditionalSect<'s> { +    IncludeSect(IncludeSect<'s>), +    IgnoreSect(IgnoreSect<'s>), +} +/// [61]   	conditionalSect	   ::=   	includeSect | ignoreSect +pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> { +    alt(( +        map(include_sect, |include_sect| { +            ConditionalSect::IncludeSect(include_sect) +        }), +        map(ignore_sect, |ignore_sect| { +            ConditionalSect::IgnoreSect(ignore_sect) +        }), +    ))(input) +} + +struct IncludeSect<'s>(ExtSubsetDecl<'s>); +/// [62]   	includeSect	   ::=   	'<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' +pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> { +    map( +        delimited( +            tuple((tag("<!["), opt(s), tag("INCLUDE"), opt(s), tag("["))), +            ext_subset_decl, +            tag("]]>"), +        ), +        |ext_subset_decl| IncludeSect(ext_subset_decl), +    )(input) +} + +struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>); +/// [63]   	ignoreSect	   ::=   	'<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' +pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> { +    map( +        delimited( +            tuple((tag("<!["), opt(s), tag("IGNORE"), opt(s), tag("["))), +            many0(ignore_sect_contents), +            tag("]]>"), +        ), +        |ignore_sect_contents| IgnoreSect(ignore_sect_contents), +    )(input) +} + +struct IgnoreSectContents<'s> { +    // TODO: what the fuck does this mean +    ignore: Ignore<'s>, +    ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>, +} +/// [64]   	ignoreSectContents	   ::=   	Ignore ('<![' ignoreSectContents ']]>' Ignore)* +pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> { +    map( +        pair( +            ignore, +            many0(tuple(( +                delimited(tag("<!["), ignore_sect_contents, tag("]]>")), +                ignore, +            ))), +        ), +        |(ignore, ignore_list)| IgnoreSectContents { +            ignore, +            ignore_list, +        }, +    )(input) +} + +type Ignore<'s> = &'s str; +/// [65]   	Ignore	   ::=   	Char* - (Char* ('<![' | ']]>') Char*) +pub fn ignore(input: &str) -> IResult<&str, Ignore> { +    recognize(many_till(xmlchar, peek(alt((tag("<!["), tag("]]>"))))))(input) +} + +#[derive(Clone)]  enum CharRef<'s> {      Decimal(&'s str),      Hexadecimal(&'s str), @@ -801,6 +1039,7 @@ pub fn char_ref(input: &str) -> IResult<&str, CharRef> {      ))(input)  } +#[derive(Clone)]  enum Reference<'s> {      EntityRef(EntityRef<'s>),      CharRef(CharRef<'s>), | 
