diff options
| author | 2024-06-25 01:04:52 +0100 | |
|---|---|---|
| committer | 2024-06-25 01:04:52 +0100 | |
| commit | 94b716753d08ec6fea53aa1942ca5ccfeda96fd7 (patch) | |
| tree | 21b40863314fe91d13059ba1a34a660caded4bc5 /src | |
| parent | 0b11cbbfd8904c11f425eb43aa10ebe3e69a758c (diff) | |
| download | peanuts-94b716753d08ec6fea53aa1942ca5ccfeda96fd7.tar.gz peanuts-94b716753d08ec6fea53aa1942ca5ccfeda96fd7.tar.bz2 peanuts-94b716753d08ec6fea53aa1942ca5ccfeda96fd7.zip | |
WIP: element parsing
Diffstat (limited to '')
| -rw-r--r-- | src/parser.rs | 94 | 
1 files changed, 81 insertions, 13 deletions
| diff --git a/src/parser.rs b/src/parser.rs index 882ebae..d86516a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -19,15 +19,6 @@ use nom::{  // output is a rust representation of the input xml  // types could be used for xml production too? -enum ContentItem<'s> { -    CharData(&'s str), -    Element(Element<'s>), -    // Reference(Reference<'s>), -    // CDSect(CDSect<'s>), -} - -type Content<'s> = Option<Vec<ContentItem<'s>>>; -  type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);  /// [1]   	document	   ::=   	prolog element Misc*  pub fn document(input: &str) -> IResult<&str, Document> { @@ -533,13 +524,30 @@ enum Element<'s> {  /// [39]   	element	   ::=   	EmptyElemTag | STag content ETag  pub fn element(input: &str) -> IResult<&str, Element> {      alt(( -        empty_elem_tag, -        map(tuple((s_tag, content, e_tag)), |(start, content, end)| {}), +        map(empty_elem_tag, |empty_elem_tag| { +            Element::Empty(empty_elem_tag) +        }), +        map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| { +            Element::NotEmpty(s_tag, content, e_tag) +        }),      ))(input)  } -// let STag<'s> = (Name<'s>, ); +struct STag<'s> { +    name: Name<'s>, +    attributes: Vec<Attribute<'s>>, +}  /// [40]   	STag	   ::=   	'<' Name (S Attribute)* S? '>' +pub fn s_tag(input: &str) -> IResult<&str, STag> { +    map( +        delimited( +            tag("<"), +            pair(name, many0(preceded(s, attribute))), +            pair(opt(s), tag(">")), +        ), +        |(name, attributes)| STag { name, attributes }, +    )(input) +}  type Attribute<'s> = (Name<'s>, AttValue<'s>);  /// [41]   	Attribute	   ::=   	Name Eq AttValue @@ -547,6 +555,64 @@ pub fn attribute(input: &str) -> IResult<&str, Attribute> {      separated_pair(name, eq, att_value)(input)  } +struct ETag<'s> { +    name: Name<'s>, +} +/// [42]   	ETag	   ::=   	'</' Name S? '>' +pub fn e_tag(input: &str) -> IResult<&str, ETag> { +    map(delimited(tag("</"), name, pair(opt(s), tag(">"))), |name| { +        ETag { name } +    })(input) +} + +enum ContentItem<'s> { +    // CharData(&'s str), +    Element(Element<'s>), +    Reference(Reference<'s>), +    CDSect(CDSect<'s>), +    PI(PI<'s>), +    Comment(Comment<'s>), +} +struct Content<'s> { +    char_data: Option<CharData<'s>>, +    content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>, +} +/// [43]   	content	   ::=   	CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* +pub fn content(input: &str) -> IResult<&str, Content> { +    map( +        pair( +            opt(char_data), +            many0(pair( +                alt(( +                    map(element, |element| ContentItem::Element(element)), +                    map(reference, |reference| ContentItem::Reference(reference)), +                    map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)), +                    map(pi, |pi| ContentItem::PI(pi)), +                    map(comment, |comment| ContentItem::Comment(comment)), +                )), +                opt(char_data), +            )), +        ), +        |(char_data, content)| Content { char_data, content }, +    )(input) +} + +struct EmptyElemTag<'s> { +    name: Name<'s>, +    attributes: Vec<Attribute<'s>>, +} +/// [44]   	EmptyElemTag	   ::=   	'<' Name (S Attribute)* S? '/>'	[WFC: Unique Att Spec] +pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> { +    map( +        delimited( +            tag("<"), +            pair(name, many0(preceded(s, attribute))), +            pair(opt(s), tag("/>")), +        ), +        |(name, attributes)| EmptyElemTag { name, attributes }, +    )(input) +} +  enum CharRef<'s> {      Decimal(&'s str),      Hexadecimal(&'s str), @@ -738,7 +804,9 @@ struct ExtParsedEnt<'s> {  }  /// [78]   	extParsedEnt	   ::=   	TextDecl? content  pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> { -    pair(opt(text_decl), content)(input) +    map(pair(opt(text_decl), content), |(text_decl, content)| { +        ExtParsedEnt { text_decl, content } +    })(input)  }  type EncodingDecl<'s> = EncName<'s>; | 
