aboutsummaryrefslogtreecommitdiffstats
path: root/src/parser.rs
diff options
context:
space:
mode:
authorLibravatar cel 🌸 <cel@blos.sm>2024-06-25 22:44:47 +0100
committerLibravatar cel 🌸 <cel@blos.sm>2024-06-25 22:44:47 +0100
commitb9d88e5c6f81fadcb4bcceb3326383fc2fb13eb5 (patch)
tree7131c66f32d485b560fc61a97030acd6a6c950cb /src/parser.rs
parent435b2af24cc6a1e8d382ca817bef6a94510127f3 (diff)
downloadpeanuts-b9d88e5c6f81fadcb4bcceb3326383fc2fb13eb5.tar.gz
peanuts-b9d88e5c6f81fadcb4bcceb3326383fc2fb13eb5.tar.bz2
peanuts-b9d88e5c6f81fadcb4bcceb3326383fc2fb13eb5.zip
WIP: conditional sections
Diffstat (limited to 'src/parser.rs')
-rw-r--r--src/parser.rs247
1 files changed, 243 insertions, 4 deletions
diff --git a/src/parser.rs b/src/parser.rs
index bae9737..a37fc17 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -82,6 +82,7 @@ pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {
recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
}
+#[derive(Clone)]
enum LiteralData<'s> {
String(&'s str),
PEReference(PEReference<'s>),
@@ -731,8 +732,8 @@ pub fn choice(input: &str) -> IResult<&str, Choice> {
pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))),
pair(opt(s), tag(")")),
),
- |(first, rest)| {
- let choice = vec![vec![first], rest].concat();
+ |(head, tail)| {
+ let choice = vec![vec![head], tail].concat();
Choice(choice)
},
)(input)
@@ -748,8 +749,8 @@ pub fn seq(input: &str) -> IResult<&str, Seq> {
pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))),
pair(opt(s), tag(")")),
),
- |(first, rest)| {
- let seq = vec![vec![first], rest].concat();
+ |(head, tail)| {
+ let seq = vec![vec![head], tail].concat();
Seq(seq)
},
)(input)
@@ -776,6 +777,243 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> {
))(input)
}
+struct AttlistDecl<'s> {
+ element_type: Name<'s>,
+ att_defs: Vec<AttDef<'s>>,
+}
+/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
+pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
+ map(
+ delimited(
+ pair(tag("<!ATTLIST"), s),
+ pair(name, many0(att_def)),
+ pair(opt(s), tag(">")),
+ ),
+ |(element_type, att_defs)| AttlistDecl {
+ element_type,
+ att_defs,
+ },
+ )(input)
+}
+
+struct AttDef<'s> {
+ name: Name<'s>,
+ att_type: AttType<'s>,
+ default_decl: DefaultDecl<'s>,
+}
+/// [53] AttDef ::= S Name S AttType S DefaultDecl
+pub fn att_def(input: &str) -> IResult<&str, AttDef> {
+ map(
+ tuple((
+ preceded(s, name),
+ preceded(s, att_type),
+ preceded(s, default_decl),
+ )),
+ |(name, att_type, default_decl)| AttDef {
+ name,
+ att_type,
+ default_decl,
+ },
+ )(input)
+}
+
+#[derive(Clone)]
+enum AttType<'s> {
+ StringType,
+ TokenizedType(TokenizedType),
+ EnumeratedType(EnumeratedType<'s>),
+}
+/// [54] AttType ::= StringType | TokenizedType | EnumeratedType
+pub fn att_type(input: &str) -> IResult<&str, AttType> {
+ alt((
+ value(AttType::StringType, string_type),
+ map(tokenized_type, |tokenized_type| {
+ AttType::TokenizedType(tokenized_type)
+ }),
+ map(enumerated_type, |enumerated_type| {
+ AttType::EnumeratedType(enumerated_type)
+ }),
+ ))(input)
+}
+
+type StringType<'s> = &'s str;
+/// [55] StringType ::= 'CDATA'
+pub fn string_type(input: &str) -> IResult<&str, StringType> {
+ tag("CDATA")(input)
+}
+
+#[derive(Clone)]
+enum TokenizedType {
+ ID,
+ IDRef,
+ IDRefs,
+ Entity,
+ Entities,
+ NMToken,
+ NMTokens,
+}
+/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
+pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> {
+ alt((
+ value(TokenizedType::ID, tag("ID")),
+ // TODO: check if this is required
+ // try idrefs first to avoid losing 'S'
+ value(TokenizedType::IDRefs, tag("IDREFS")),
+ value(TokenizedType::IDRef, tag("IDREF")),
+ value(TokenizedType::Entity, tag("ENTITY")),
+ value(TokenizedType::Entities, tag("ENTITIES")),
+ // same here
+ value(TokenizedType::NMTokens, tag("NMTOKENS")),
+ value(TokenizedType::NMToken, tag("NMTOKEN")),
+ ))(input)
+}
+
+#[derive(Clone)]
+enum EnumeratedType<'s> {
+ NotationType(NotationType<'s>),
+ Enumeration(Enumeration<'s>),
+}
+/// [57] EnumeratedType ::= NotationType | Enumeration
+pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> {
+ alt((
+ map(notation_type, |notation_type| {
+ EnumeratedType::NotationType(notation_type)
+ }),
+ map(enumeration, |enumeration| {
+ EnumeratedType::Enumeration(enumeration)
+ }),
+ ))(input)
+}
+
+#[derive(Clone)]
+struct NotationType<'s>(Vec<Name<'s>>);
+/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
+pub fn notation_type(input: &str) -> IResult<&str, NotationType> {
+ map(
+ delimited(
+ tuple((tag("NOTATION"), s, tag("("), opt(s))),
+ pair(
+ name,
+ many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)),
+ ),
+ pair(opt(s), tag(")")),
+ ),
+ |(head, tail)| {
+ let notation_type = vec![vec![head], tail].concat();
+ NotationType(notation_type)
+ },
+ )(input)
+}
+
+#[derive(Clone)]
+struct Enumeration<'s>(Vec<Nmtoken<'s>>);
+/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
+pub fn enumeration(input: &str) -> IResult<&str, Enumeration> {
+ map(
+ delimited(
+ pair(tag("("), opt(s)),
+ pair(
+ nmtoken,
+ many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)),
+ ),
+ pair(opt(s), tag(")")),
+ ),
+ |(head, tail)| {
+ let enumeration = vec![vec![head], tail].concat();
+ Enumeration(enumeration)
+ },
+ )(input)
+}
+
+#[derive(Clone)]
+enum DefaultDecl<'s> {
+ Required,
+ Implied,
+ Fixed(AttValue<'s>),
+}
+/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
+pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> {
+ alt((
+ value(DefaultDecl::Required, tag("#REQUIRED")),
+ value(DefaultDecl::Implied, tag("#IMPLIED")),
+ map(
+ preceded(opt(pair(tag("#FIXED"), s)), att_value),
+ |att_value| DefaultDecl::Fixed(att_value),
+ ),
+ ))(input)
+}
+
+enum ConditionalSect<'s> {
+ IncludeSect(IncludeSect<'s>),
+ IgnoreSect(IgnoreSect<'s>),
+}
+/// [61] conditionalSect ::= includeSect | ignoreSect
+pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> {
+ alt((
+ map(include_sect, |include_sect| {
+ ConditionalSect::IncludeSect(include_sect)
+ }),
+ map(ignore_sect, |ignore_sect| {
+ ConditionalSect::IgnoreSect(ignore_sect)
+ }),
+ ))(input)
+}
+
+struct IncludeSect<'s>(ExtSubsetDecl<'s>);
+/// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
+pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> {
+ map(
+ delimited(
+ tuple((tag("<!["), opt(s), tag("INCLUDE"), opt(s), tag("["))),
+ ext_subset_decl,
+ tag("]]>"),
+ ),
+ |ext_subset_decl| IncludeSect(ext_subset_decl),
+ )(input)
+}
+
+struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);
+/// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
+pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> {
+ map(
+ delimited(
+ tuple((tag("<!["), opt(s), tag("IGNORE"), opt(s), tag("["))),
+ many0(ignore_sect_contents),
+ tag("]]>"),
+ ),
+ |ignore_sect_contents| IgnoreSect(ignore_sect_contents),
+ )(input)
+}
+
+struct IgnoreSectContents<'s> {
+ // TODO: what the fuck does this mean
+ ignore: Ignore<'s>,
+ ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,
+}
+/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
+pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> {
+ map(
+ pair(
+ ignore,
+ many0(tuple((
+ delimited(tag("<!["), ignore_sect_contents, tag("]]>")),
+ ignore,
+ ))),
+ ),
+ |(ignore, ignore_list)| IgnoreSectContents {
+ ignore,
+ ignore_list,
+ },
+ )(input)
+}
+
+type Ignore<'s> = &'s str;
+/// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
+pub fn ignore(input: &str) -> IResult<&str, Ignore> {
+ recognize(many_till(xmlchar, peek(alt((tag("<!["), tag("]]>"))))))(input)
+}
+
+#[derive(Clone)]
enum CharRef<'s> {
Decimal(&'s str),
Hexadecimal(&'s str),
@@ -801,6 +1039,7 @@ pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
))(input)
}
+#[derive(Clone)]
enum Reference<'s> {
EntityRef(EntityRef<'s>),
CharRef(CharRef<'s>),