diff options
author | cel 🌸 <cel@blos.sm> | 2024-06-25 01:04:52 +0100 |
---|---|---|
committer | cel 🌸 <cel@blos.sm> | 2024-06-25 01:04:52 +0100 |
commit | 94b716753d08ec6fea53aa1942ca5ccfeda96fd7 (patch) | |
tree | 21b40863314fe91d13059ba1a34a660caded4bc5 /src | |
parent | 0b11cbbfd8904c11f425eb43aa10ebe3e69a758c (diff) | |
download | peanuts-94b716753d08ec6fea53aa1942ca5ccfeda96fd7.tar.gz peanuts-94b716753d08ec6fea53aa1942ca5ccfeda96fd7.tar.bz2 peanuts-94b716753d08ec6fea53aa1942ca5ccfeda96fd7.zip |
WIP: element parsing
Diffstat (limited to 'src')
-rw-r--r-- | src/parser.rs | 94 |
1 files changed, 81 insertions, 13 deletions
diff --git a/src/parser.rs b/src/parser.rs index 882ebae..d86516a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -19,15 +19,6 @@ use nom::{ // output is a rust representation of the input xml // types could be used for xml production too? -enum ContentItem<'s> { - CharData(&'s str), - Element(Element<'s>), - // Reference(Reference<'s>), - // CDSect(CDSect<'s>), -} - -type Content<'s> = Option<Vec<ContentItem<'s>>>; - type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>); /// [1] document ::= prolog element Misc* pub fn document(input: &str) -> IResult<&str, Document> { @@ -533,13 +524,30 @@ enum Element<'s> { /// [39] element ::= EmptyElemTag | STag content ETag pub fn element(input: &str) -> IResult<&str, Element> { alt(( - empty_elem_tag, - map(tuple((s_tag, content, e_tag)), |(start, content, end)| {}), + map(empty_elem_tag, |empty_elem_tag| { + Element::Empty(empty_elem_tag) + }), + map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| { + Element::NotEmpty(s_tag, content, e_tag) + }), ))(input) } -// let STag<'s> = (Name<'s>, ); +struct STag<'s> { + name: Name<'s>, + attributes: Vec<Attribute<'s>>, +} /// [40] STag ::= '<' Name (S Attribute)* S? '>' +pub fn s_tag(input: &str) -> IResult<&str, STag> { + map( + delimited( + tag("<"), + pair(name, many0(preceded(s, attribute))), + pair(opt(s), tag(">")), + ), + |(name, attributes)| STag { name, attributes }, + )(input) +} type Attribute<'s> = (Name<'s>, AttValue<'s>); /// [41] Attribute ::= Name Eq AttValue @@ -547,6 +555,64 @@ pub fn attribute(input: &str) -> IResult<&str, Attribute> { separated_pair(name, eq, att_value)(input) } +struct ETag<'s> { + name: Name<'s>, +} +/// [42] ETag ::= '</' Name S? '>' +pub fn e_tag(input: &str) -> IResult<&str, ETag> { + map(delimited(tag("</"), name, pair(opt(s), tag(">"))), |name| { + ETag { name } + })(input) +} + +enum ContentItem<'s> { + // CharData(&'s str), + Element(Element<'s>), + Reference(Reference<'s>), + CDSect(CDSect<'s>), + PI(PI<'s>), + Comment(Comment<'s>), +} +struct Content<'s> { + char_data: Option<CharData<'s>>, + content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>, +} +/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* +pub fn content(input: &str) -> IResult<&str, Content> { + map( + pair( + opt(char_data), + many0(pair( + alt(( + map(element, |element| ContentItem::Element(element)), + map(reference, |reference| ContentItem::Reference(reference)), + map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)), + map(pi, |pi| ContentItem::PI(pi)), + map(comment, |comment| ContentItem::Comment(comment)), + )), + opt(char_data), + )), + ), + |(char_data, content)| Content { char_data, content }, + )(input) +} + +struct EmptyElemTag<'s> { + name: Name<'s>, + attributes: Vec<Attribute<'s>>, +} +/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec] +pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> { + map( + delimited( + tag("<"), + pair(name, many0(preceded(s, attribute))), + pair(opt(s), tag("/>")), + ), + |(name, attributes)| EmptyElemTag { name, attributes }, + )(input) +} + enum CharRef<'s> { Decimal(&'s str), Hexadecimal(&'s str), @@ -738,7 +804,9 @@ struct ExtParsedEnt<'s> { } /// [78] extParsedEnt ::= TextDecl? content pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> { - pair(opt(text_decl), content)(input) + map(pair(opt(text_decl), content), |(text_decl, content)| { + ExtParsedEnt { text_decl, content } + })(input) } type EncodingDecl<'s> = EncName<'s>; |