diff options
Diffstat (limited to '')
| -rw-r--r-- | src/parser.rs | 208 | 
1 files changed, 188 insertions, 20 deletions
| diff --git a/src/parser.rs b/src/parser.rs index e689a53..882ebae 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,7 +2,7 @@ use std::char;  use nom::{      branch::{alt, permutation}, -    bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until}, +    bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until, take_while},      character::{          complete::one_of,          streaming::{alpha1, char, digit1, none_of, satisfy}, @@ -16,6 +16,8 @@ use nom::{  // parser: parses tokens from lexer into events  // no well formedness, validity, or data model, simple translation of input into rust types +// output is a rust representation of the input xml +// types could be used for xml production too?  enum ContentItem<'s> {      CharData(&'s str), @@ -89,37 +91,73 @@ pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {      recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)  } -type EntityValue<'s> = &'s str; +enum LiteralData<'s> { +    String(&'s str), +    PEReference(PEReference<'s>), +    Reference(Reference<'s>), +} + +type EntityValue<'s> = Vec<LiteralData<'s>>;  /// [9]   	EntityValue	   ::=   	'"' ([^%&"] | PEReference | Reference)* '"'  ///			|  "'" ([^%&'] | PEReference | Reference)* "'"  pub fn entity_value(input: &str) -> IResult<&str, EntityValue> {      alt((          delimited(              char('"'), -            recognize(many0(alt((none_of("%&\""), pe_reference, reference)))), +            many0(alt(( +                map( +                    recognize(many_till(take(1usize), peek(one_of("%&\"")))), +                    |string| LiteralData::String(string), +                ), +                map(pe_reference, |pe_reference| { +                    LiteralData::PEReference(pe_reference) +                }), +                map(reference, |reference| LiteralData::Reference(reference)), +            ))),              char('"'),          ),          delimited(              char('\''), -            recognize(many0(alt((none_of("%&'"), pe_reference, reference)))), +            many0(alt(( +                map( +                    recognize(many_till(take(1usize), peek(one_of("%&'")))), +                    |string| LiteralData::String(string), +                ), +                map(pe_reference, |pe_reference| { +                    LiteralData::PEReference(pe_reference) +                }), +                map(reference, |reference| LiteralData::Reference(reference)), +            ))),              char('\''),          ),      ))(input)  } -type AttValue<'s> = &'s str; +type AttValue<'s> = Vec<LiteralData<'s>>;  /// [10]   	AttValue	   ::=   	'"' ([^<&"] | Reference)* '"'  /// 			|  "'" ([^<&'] | Reference)* "'"  pub fn att_value(input: &str) -> IResult<&str, AttValue> {      alt((          delimited(              char('"'), -            recognize(many0(alt((none_of("<&\""), reference)))), +            many0(alt(( +                map( +                    recognize(many_till(take(1usize), peek(one_of("%&\"")))), +                    |string| LiteralData::String(string), +                ), +                map(reference, |reference| LiteralData::Reference(reference)), +            ))),              char('"'),          ),          delimited(              char('\''), -            recognize(many0(alt((none_of("<&'"), reference)))), +            many0(alt(( +                map( +                    recognize(many_till(take(1usize), peek(one_of("%&'")))), +                    |string| LiteralData::String(string), +                ), +                map(reference, |reference| LiteralData::Reference(reference)), +            ))),              char('\''),          ),      ))(input) @@ -389,18 +427,18 @@ pub fn decl_sep(input: &str) -> IResult<&str, DeclSep> {      ))(input)  } -enum IntSubsetItem<'s> { +enum IntSubsetDeclaration<'s> {      MarkupDecl(MarkupDecl<'s>),      DeclSep(DeclSep<'s>),  } -type IntSubset<'s> = Vec<IntSubsetItem<'s>>; +type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;  /// [28b]   	intSubset	   ::=   	(markupdecl | DeclSep)*  pub fn int_subset(input: &str) -> IResult<&str, IntSubset> {      many0(alt((          map(markup_decl, |markup_decl| { -            IntSubsetItem::MarkupDecl(markup_decl) +            IntSubsetDeclaration::MarkupDecl(markup_decl)          }), -        map(decl_sep, |decl_sep| IntSubsetItem::DeclSep(decl_sep)), +        map(decl_sep, |decl_sep| IntSubsetDeclaration::DeclSep(decl_sep)),      )))(input)  } @@ -432,9 +470,39 @@ pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> {      ))(input)  } +struct ExtSubset<'s> { +    text_decl: Option<TextDecl<'s>>, +    ext_subset_decl: ExtSubsetDecl<'s>, +}  /// [30]   	extSubset	   ::=   	TextDecl? extSubsetDecl +pub fn ext_subset(input: &str) -> IResult<&str, ExtSubset> { +    map( +        pair(opt(text_decl), ext_subset_decl), +        |(text_decl, ext_subset_decl)| ExtSubset { +            text_decl, +            ext_subset_decl, +        }, +    )(input) +} +enum ExtSubsetDeclaration<'s> { +    MarkupDecl(MarkupDecl<'s>), +    ConditionalSect(ConditionalSect<'s>), +    DeclSep(DeclSep<'s>), +} +type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;  /// [31]   	extSubsetDecl	   ::=   	( markupdecl | conditionalSect | DeclSep)* +pub fn ext_subset_decl(input: &str) -> IResult<&str, ExtSubsetDecl> { +    many0(alt(( +        map(markup_decl, |markup_decl| { +            ExtSubsetDeclaration::MarkupDecl(markup_decl) +        }), +        map(conditional_sect, |conditional_sect| { +            ExtSubsetDeclaration::ConditionalSect(conditional_sect) +        }), +        map(decl_sep, |decl_sep| ExtSubsetDeclaration::DeclSep(decl_sep)), +    )))(input) +}  type SDDecl = bool;  /// [32]   	SDDecl	   ::=   	S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) @@ -458,10 +526,9 @@ pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {  // (Productions 33 through 38 have been removed.) -struct Element<'s> { -    name: &'s str, -    attributes: Vec<Attribute<'s>>, -    content: Content<'s>, +enum Element<'s> { +    Empty(EmptyElemTag<'s>), +    NotEmpty(STag<'s>, Content<'s>, ETag<'s>),  }  /// [39]   	element	   ::=   	EmptyElemTag | STag content ETag  pub fn element(input: &str) -> IResult<&str, Element> { @@ -480,10 +547,29 @@ pub fn attribute(input: &str) -> IResult<&str, Attribute> {      separated_pair(name, eq, att_value)(input)  } -type CharRef<'s> = &'s str; +enum CharRef<'s> { +    Decimal(&'s str), +    Hexadecimal(&'s str), +}  /// [66]   	CharRef	   ::=   	'&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'  pub fn char_ref(input: &str) -> IResult<&str, CharRef> { -    todo!() +    alt(( +        delimited( +            tag("&#"), +            map(take_while(|c| matches!(c, '0'..='9')), |decimal| { +                CharRef::Decimal(decimal) +            }), +            tag(";"), +        ), +        delimited( +            tag("&#x"), +            map( +                take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )), +                |hexadecimal| CharRef::Hexadecimal(hexadecimal), +            ), +            tag(";"), +        ), +    ))(input)  }  enum Reference<'s> { @@ -510,7 +596,86 @@ pub fn pe_reference(input: &str) -> IResult<&str, PEReference> {      delimited(tag("%"), name, tag(";"))(input)  } -/// TODO: entity declarations +enum EntityDecl<'s> { +    GEDecl(GEDecl<'s>), +    PEDecl(PEDecl<'s>), +} +/// [70]   	EntityDecl	   ::=   	GEDecl | PEDecl +pub fn entity_decl(input: &str) -> IResult<&str, EntityDecl> { +    alt(( +        map(ge_decl, |ge_decl| EntityDecl::GEDecl(ge_decl)), +        map(pe_decl, |pe_decl| EntityDecl::PEDecl(pe_decl)), +    ))(input) +} + +struct GEDecl<'s> { +    name: Name<'s>, +    entity_def: EntityDef<'s>, +} +/// [71]   	GEDecl	   ::=   	'<!ENTITY' S Name S EntityDef S? '>' +pub fn ge_decl(input: &str) -> IResult<&str, GEDecl> { +    map( +        delimited( +            pair(tag("<!ENTITY"), s), +            separated_pair(name, s, entity_def), +            pair(opt(s), tag(">")), +        ), +        |(name, entity_def)| GEDecl { name, entity_def }, +    )(input) +} + +struct PEDecl<'s> { +    name: Name<'s>, +    pe_def: PEDef<'s>, +} +/// [72]   	PEDecl	   ::=   	'<!ENTITY' S '%' S Name S PEDef S? '>' +pub fn pe_decl(input: &str) -> IResult<&str, PEDecl> { +    map( +        delimited( +            tuple((tag("<!ENTITY"), s, tag("%"), s)), +            separated_pair(name, s, pe_def), +            pair(opt(s), tag(">")), +        ), +        |(name, pe_def)| PEDecl { name, pe_def }, +    )(input) +} + +enum EntityDef<'s> { +    EntityValue(EntityValue<'s>), +    ExternalID { +        external_id: ExternalID<'s>, +        ndata_decl: Option<NDataDecl<'s>>, +    }, +} +/// [73]   	EntityDef	   ::=   	EntityValue | (ExternalID NDataDecl?) +pub fn entity_def(input: &str) -> IResult<&str, EntityDef> { +    alt(( +        map(entity_value, |entity_value| { +            EntityDef::EntityValue(entity_value) +        }), +        map( +            pair(external_id, opt(ndata_decl)), +            |(external_id, ndata_decl)| EntityDef::ExternalID { +                external_id, +                ndata_decl, +            }, +        ), +    ))(input) +} + +enum PEDef<'s> { +    EntityValue(EntityValue<'s>), +    ExternalID(ExternalID<'s>), +} +/// [74]   	PEDef	   ::=   	EntityValue | ExternalID +pub fn pe_def(input: &str) -> IResult<&str, PEDef> { +    alt(( +        map(entity_value, |entity_value| { +            PEDef::EntityValue(entity_value) +        }), +        map(external_id, |external_id| PEDef::ExternalID(external_id)), +    ))(input) +}  enum ExternalID<'s> {      SYSTEM { @@ -567,9 +732,12 @@ pub fn text_decl(input: &str) -> IResult<&str, TextDecl> {      )(input)  } -type extParsedEnt<'s> = (Option<TextDecl<'s>>, Content<'s>); +struct ExtParsedEnt<'s> { +    text_decl: Option<TextDecl<'s>>, +    content: Content<'s>, +}  /// [78]   	extParsedEnt	   ::=   	TextDecl? content -pub fn ext_parsed_ent(input: &str) -> IResult<&str, extParsedEnt> { +pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> {      pair(opt(text_decl), content)(input)  } | 
