aboutsummaryrefslogtreecommitdiffstats
path: root/src/parser.rs
diff options
context:
space:
mode:
authorLibravatar cel 🌸 <cel@blos.sm>2024-06-25 01:04:52 +0100
committerLibravatar cel 🌸 <cel@blos.sm>2024-06-25 01:04:52 +0100
commit94b716753d08ec6fea53aa1942ca5ccfeda96fd7 (patch)
tree21b40863314fe91d13059ba1a34a660caded4bc5 /src/parser.rs
parent0b11cbbfd8904c11f425eb43aa10ebe3e69a758c (diff)
downloadpeanuts-94b716753d08ec6fea53aa1942ca5ccfeda96fd7.tar.gz
peanuts-94b716753d08ec6fea53aa1942ca5ccfeda96fd7.tar.bz2
peanuts-94b716753d08ec6fea53aa1942ca5ccfeda96fd7.zip
WIP: element parsing
Diffstat (limited to '')
-rw-r--r--src/parser.rs94
1 files changed, 81 insertions, 13 deletions
diff --git a/src/parser.rs b/src/parser.rs
index 882ebae..d86516a 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -19,15 +19,6 @@ use nom::{
// output is a rust representation of the input xml
// types could be used for xml production too?
-enum ContentItem<'s> {
- CharData(&'s str),
- Element(Element<'s>),
- // Reference(Reference<'s>),
- // CDSect(CDSect<'s>),
-}
-
-type Content<'s> = Option<Vec<ContentItem<'s>>>;
-
type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [1] document ::= prolog element Misc*
pub fn document(input: &str) -> IResult<&str, Document> {
@@ -533,13 +524,30 @@ enum Element<'s> {
/// [39] element ::= EmptyElemTag | STag content ETag
pub fn element(input: &str) -> IResult<&str, Element> {
alt((
- empty_elem_tag,
- map(tuple((s_tag, content, e_tag)), |(start, content, end)| {}),
+ map(empty_elem_tag, |empty_elem_tag| {
+ Element::Empty(empty_elem_tag)
+ }),
+ map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| {
+ Element::NotEmpty(s_tag, content, e_tag)
+ }),
))(input)
}
-// let STag<'s> = (Name<'s>, );
+struct STag<'s> {
+ name: Name<'s>,
+ attributes: Vec<Attribute<'s>>,
+}
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
+pub fn s_tag(input: &str) -> IResult<&str, STag> {
+ map(
+ delimited(
+ tag("<"),
+ pair(name, many0(preceded(s, attribute))),
+ pair(opt(s), tag(">")),
+ ),
+ |(name, attributes)| STag { name, attributes },
+ )(input)
+}
type Attribute<'s> = (Name<'s>, AttValue<'s>);
/// [41] Attribute ::= Name Eq AttValue
@@ -547,6 +555,64 @@ pub fn attribute(input: &str) -> IResult<&str, Attribute> {
separated_pair(name, eq, att_value)(input)
}
+struct ETag<'s> {
+ name: Name<'s>,
+}
+/// [42] ETag ::= '</' Name S? '>'
+pub fn e_tag(input: &str) -> IResult<&str, ETag> {
+ map(delimited(tag("</"), name, pair(opt(s), tag(">"))), |name| {
+ ETag { name }
+ })(input)
+}
+
+enum ContentItem<'s> {
+ // CharData(&'s str),
+ Element(Element<'s>),
+ Reference(Reference<'s>),
+ CDSect(CDSect<'s>),
+ PI(PI<'s>),
+ Comment(Comment<'s>),
+}
+struct Content<'s> {
+ char_data: Option<CharData<'s>>,
+ content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
+}
+/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
+pub fn content(input: &str) -> IResult<&str, Content> {
+ map(
+ pair(
+ opt(char_data),
+ many0(pair(
+ alt((
+ map(element, |element| ContentItem::Element(element)),
+ map(reference, |reference| ContentItem::Reference(reference)),
+ map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)),
+ map(pi, |pi| ContentItem::PI(pi)),
+ map(comment, |comment| ContentItem::Comment(comment)),
+ )),
+ opt(char_data),
+ )),
+ ),
+ |(char_data, content)| Content { char_data, content },
+ )(input)
+}
+
+struct EmptyElemTag<'s> {
+ name: Name<'s>,
+ attributes: Vec<Attribute<'s>>,
+}
+/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
+pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {
+ map(
+ delimited(
+ tag("<"),
+ pair(name, many0(preceded(s, attribute))),
+ pair(opt(s), tag("/>")),
+ ),
+ |(name, attributes)| EmptyElemTag { name, attributes },
+ )(input)
+}
+
enum CharRef<'s> {
Decimal(&'s str),
Hexadecimal(&'s str),
@@ -738,7 +804,9 @@ struct ExtParsedEnt<'s> {
}
/// [78] extParsedEnt ::= TextDecl? content
pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> {
- pair(opt(text_decl), content)(input)
+ map(pair(opt(text_decl), content), |(text_decl, content)| {
+ ExtParsedEnt { text_decl, content }
+ })(input)
}
type EncodingDecl<'s> = EncName<'s>;